@bradtaylorsf/alpha-loop 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -19
- package/dist/cli.js +83 -1
- package/dist/cli.js.map +1 -1
- package/dist/commands/auth.js +1 -1
- package/dist/commands/auth.js.map +1 -1
- package/dist/commands/eval.d.ts +53 -0
- package/dist/commands/eval.js +538 -0
- package/dist/commands/eval.js.map +1 -0
- package/dist/commands/evolve.d.ts +25 -0
- package/dist/commands/evolve.js +270 -0
- package/dist/commands/evolve.js.map +1 -0
- package/dist/commands/history.d.ts +1 -1
- package/dist/commands/history.js +4 -4
- package/dist/commands/history.js.map +1 -1
- package/dist/commands/init.d.ts +14 -0
- package/dist/commands/init.js +199 -30
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/resume.js +1 -0
- package/dist/commands/resume.js.map +1 -1
- package/dist/commands/run.js +170 -12
- package/dist/commands/run.js.map +1 -1
- package/dist/commands/scan.d.ts +1 -1
- package/dist/commands/scan.js +12 -9
- package/dist/commands/scan.js.map +1 -1
- package/dist/commands/sync.d.ts +5 -0
- package/dist/commands/sync.js +24 -5
- package/dist/commands/sync.js.map +1 -1
- package/dist/commands/vision.js +5 -3
- package/dist/commands/vision.js.map +1 -1
- package/dist/engine/agents.d.ts +6 -1
- package/dist/engine/agents.js +14 -12
- package/dist/engine/agents.js.map +1 -1
- package/dist/engine/prerequisites.d.ts +4 -7
- package/dist/engine/prerequisites.js +12 -36
- package/dist/engine/prerequisites.js.map +1 -1
- package/dist/lib/agent.d.ts +18 -0
- package/dist/lib/agent.js +211 -30
- package/dist/lib/agent.js.map +1 -1
- package/dist/lib/config.d.ts +25 -2
- package/dist/lib/config.js +80 -7
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/eval-checks.d.ts +91 -0
- package/dist/lib/eval-checks.js +254 -0
- package/dist/lib/eval-checks.js.map +1 -0
- package/dist/lib/eval-runner.d.ts +29 -0
- package/dist/lib/eval-runner.js +439 -0
- package/dist/lib/eval-runner.js.map +1 -0
- package/dist/lib/eval.d.ts +170 -0
- package/dist/lib/eval.js +507 -0
- package/dist/lib/eval.js.map +1 -0
- package/dist/lib/learning.js +2 -2
- package/dist/lib/learning.js.map +1 -1
- package/dist/lib/pipeline.d.ts +44 -0
- package/dist/lib/pipeline.js +607 -138
- package/dist/lib/pipeline.js.map +1 -1
- package/dist/lib/prompts.d.ts +19 -0
- package/dist/lib/prompts.js +56 -5
- package/dist/lib/prompts.js.map +1 -1
- package/dist/lib/score.d.ts +80 -0
- package/dist/lib/score.js +172 -0
- package/dist/lib/score.js.map +1 -0
- package/dist/lib/session.d.ts +2 -1
- package/dist/lib/session.js +70 -19
- package/dist/lib/session.js.map +1 -1
- package/dist/lib/traces.d.ts +173 -0
- package/dist/lib/traces.js +272 -0
- package/dist/lib/traces.js.map +1 -0
- package/dist/lib/verify.d.ts +7 -1
- package/dist/lib/verify.js +109 -157
- package/dist/lib/verify.js.map +1 -1
- package/dist/lib/worktree.d.ts +1 -0
- package/dist/lib/worktree.js +9 -1
- package/dist/lib/worktree.js.map +1 -1
- package/package.json +1 -1
- package/templates/agents/implementer.md +1 -1
- package/templates/agents/reviewer.md +1 -1
- package/dist/engine/config.d.ts +0 -71
- package/dist/engine/config.js +0 -73
- package/dist/engine/config.js.map +0 -1
package/dist/lib/config.js
CHANGED
|
@@ -1,12 +1,23 @@
|
|
|
1
1
|
import { readFileSync, existsSync } from 'node:fs';
|
|
2
2
|
import { execSync } from 'node:child_process';
|
|
3
3
|
import { parse as parseYaml } from 'yaml';
|
|
4
|
+
/**
|
|
5
|
+
* Estimate cost in USD from token counts and a pricing table.
|
|
6
|
+
* Returns 0 if the model is not in the pricing table.
|
|
7
|
+
*/
|
|
8
|
+
export function estimateCost(model, inputTokens, outputTokens, pricing) {
|
|
9
|
+
const p = pricing[model];
|
|
10
|
+
if (!p)
|
|
11
|
+
return 0;
|
|
12
|
+
return (inputTokens * p.input + outputTokens * p.output) / 1_000_000;
|
|
13
|
+
}
|
|
4
14
|
const DEFAULTS = {
|
|
5
15
|
repo: '',
|
|
6
16
|
repoOwner: '',
|
|
7
17
|
project: 2,
|
|
8
|
-
|
|
9
|
-
|
|
18
|
+
agent: 'claude',
|
|
19
|
+
model: '',
|
|
20
|
+
reviewModel: '',
|
|
10
21
|
pollInterval: 60,
|
|
11
22
|
dryRun: false,
|
|
12
23
|
baseBranch: 'master',
|
|
@@ -15,7 +26,6 @@ const DEFAULTS = {
|
|
|
15
26
|
maxTestRetries: 3,
|
|
16
27
|
testCommand: 'pnpm test',
|
|
17
28
|
devCommand: 'pnpm dev',
|
|
18
|
-
port: 3000,
|
|
19
29
|
skipTests: false,
|
|
20
30
|
skipReview: false,
|
|
21
31
|
skipInstall: false,
|
|
@@ -32,12 +42,27 @@ const DEFAULTS = {
|
|
|
32
42
|
runFull: false,
|
|
33
43
|
verbose: false,
|
|
34
44
|
harnesses: [],
|
|
45
|
+
setupCommand: '',
|
|
46
|
+
evalDir: '.alpha-loop/evals',
|
|
47
|
+
evalModel: '',
|
|
48
|
+
skipEval: false,
|
|
49
|
+
evalTimeout: 300,
|
|
50
|
+
autoCapture: true,
|
|
51
|
+
skipPostSessionReview: false,
|
|
52
|
+
skipPostSessionSecurity: false,
|
|
53
|
+
pricing: {
|
|
54
|
+
'claude-opus-4-6': { input: 15.0, output: 75.0 },
|
|
55
|
+
'claude-sonnet-4-6': { input: 3.0, output: 15.0 },
|
|
56
|
+
'claude-haiku-4-5': { input: 0.80, output: 4.0 },
|
|
57
|
+
'codex-mini': { input: 1.50, output: 6.0 },
|
|
58
|
+
},
|
|
35
59
|
};
|
|
36
60
|
/** Map from YAML key (snake_case) to Config key (camelCase). */
|
|
37
61
|
const YAML_KEY_MAP = {
|
|
38
62
|
harnesses: 'harnesses',
|
|
39
63
|
repo: 'repo',
|
|
40
64
|
project: 'project',
|
|
65
|
+
agent: 'agent',
|
|
41
66
|
model: 'model',
|
|
42
67
|
review_model: 'reviewModel',
|
|
43
68
|
poll_interval: 'pollInterval',
|
|
@@ -48,7 +73,6 @@ const YAML_KEY_MAP = {
|
|
|
48
73
|
max_test_retries: 'maxTestRetries',
|
|
49
74
|
test_command: 'testCommand',
|
|
50
75
|
dev_command: 'devCommand',
|
|
51
|
-
port: 'port',
|
|
52
76
|
skip_tests: 'skipTests',
|
|
53
77
|
skip_review: 'skipReview',
|
|
54
78
|
skip_install: 'skipInstall',
|
|
@@ -64,11 +88,18 @@ const YAML_KEY_MAP = {
|
|
|
64
88
|
auto_cleanup: 'autoCleanup',
|
|
65
89
|
run_full: 'runFull',
|
|
66
90
|
verbose: 'verbose',
|
|
91
|
+
setup_command: 'setupCommand',
|
|
92
|
+
eval_dir: 'evalDir',
|
|
93
|
+
eval_model: 'evalModel',
|
|
94
|
+
skip_eval: 'skipEval',
|
|
95
|
+
eval_timeout: 'evalTimeout',
|
|
96
|
+
auto_capture: 'autoCapture',
|
|
67
97
|
};
|
|
68
98
|
/** Map from env var name to Config key. */
|
|
69
99
|
const ENV_KEY_MAP = {
|
|
70
100
|
REPO: 'repo',
|
|
71
|
-
|
|
101
|
+
PROJECT: 'project',
|
|
102
|
+
AGENT: 'agent',
|
|
72
103
|
MODEL: 'model',
|
|
73
104
|
REVIEW_MODEL: 'reviewModel',
|
|
74
105
|
POLL_INTERVAL: 'pollInterval',
|
|
@@ -79,7 +110,6 @@ const ENV_KEY_MAP = {
|
|
|
79
110
|
MAX_TEST_RETRIES: 'maxTestRetries',
|
|
80
111
|
TEST_COMMAND: 'testCommand',
|
|
81
112
|
DEV_COMMAND: 'devCommand',
|
|
82
|
-
PORT: 'port',
|
|
83
113
|
SKIP_TESTS: 'skipTests',
|
|
84
114
|
SKIP_REVIEW: 'skipReview',
|
|
85
115
|
SKIP_INSTALL: 'skipInstall',
|
|
@@ -95,6 +125,14 @@ const ENV_KEY_MAP = {
|
|
|
95
125
|
AUTO_CLEANUP: 'autoCleanup',
|
|
96
126
|
RUN_FULL: 'runFull',
|
|
97
127
|
VERBOSE: 'verbose',
|
|
128
|
+
SETUP_COMMAND: 'setupCommand',
|
|
129
|
+
EVAL_DIR: 'evalDir',
|
|
130
|
+
EVAL_MODEL: 'evalModel',
|
|
131
|
+
SKIP_EVAL: 'skipEval',
|
|
132
|
+
EVAL_TIMEOUT: 'evalTimeout',
|
|
133
|
+
AUTO_CAPTURE: 'autoCapture',
|
|
134
|
+
SKIP_POST_SESSION_REVIEW: 'skipPostSessionReview',
|
|
135
|
+
SKIP_POST_SESSION_SECURITY: 'skipPostSessionSecurity',
|
|
98
136
|
};
|
|
99
137
|
function coerce(value, current) {
|
|
100
138
|
if (typeof current === 'number')
|
|
@@ -103,8 +141,10 @@ function coerce(value, current) {
|
|
|
103
141
|
return value === 'true' || value === '1';
|
|
104
142
|
return value;
|
|
105
143
|
}
|
|
106
|
-
/** Validate a string contains only safe shell characters. */
|
|
144
|
+
/** Validate a string contains only safe shell characters. Empty strings are allowed (model is optional). */
|
|
107
145
|
export function assertSafeShellArg(value, name) {
|
|
146
|
+
if (value === '')
|
|
147
|
+
return value;
|
|
108
148
|
if (!/^[a-zA-Z0-9._\-/]+$/.test(value)) {
|
|
109
149
|
throw new Error(`Invalid ${name}: contains unsafe characters: ${value}`);
|
|
110
150
|
}
|
|
@@ -143,6 +183,27 @@ function loadYamlConfig(configPath) {
|
|
|
143
183
|
result[configKey] = parsed[yamlKey];
|
|
144
184
|
}
|
|
145
185
|
}
|
|
186
|
+
// Handle post_session nested config
|
|
187
|
+
if (parsed.post_session && typeof parsed.post_session === 'object') {
|
|
188
|
+
const ps = parsed.post_session;
|
|
189
|
+
if (ps.review === false)
|
|
190
|
+
result.skipPostSessionReview = true;
|
|
191
|
+
if (ps.security_scan === false)
|
|
192
|
+
result.skipPostSessionSecurity = true;
|
|
193
|
+
}
|
|
194
|
+
// Handle pricing table (nested object, not in YAML_KEY_MAP)
|
|
195
|
+
if (parsed.pricing && typeof parsed.pricing === 'object') {
|
|
196
|
+
const pricing = {};
|
|
197
|
+
for (const [model, value] of Object.entries(parsed.pricing)) {
|
|
198
|
+
const v = value;
|
|
199
|
+
if (typeof v?.input === 'number' && typeof v?.output === 'number') {
|
|
200
|
+
pricing[model] = { input: v.input, output: v.output };
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (Object.keys(pricing).length > 0) {
|
|
204
|
+
result.pricing = pricing;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
146
207
|
return result;
|
|
147
208
|
}
|
|
148
209
|
function loadEnvConfig() {
|
|
@@ -165,13 +226,25 @@ export function loadConfig(overrides) {
|
|
|
165
226
|
autoDetect.repo = detectedRepo;
|
|
166
227
|
}
|
|
167
228
|
// Precedence: overrides (CLI flags) > env vars > config file > auto-detect > defaults
|
|
229
|
+
// Pricing is merged specially: YAML/overrides extend defaults rather than replacing
|
|
230
|
+
const mergedPricing = {
|
|
231
|
+
...DEFAULTS.pricing,
|
|
232
|
+
...yamlConfig.pricing,
|
|
233
|
+
...overrides?.pricing,
|
|
234
|
+
};
|
|
168
235
|
const merged = {
|
|
169
236
|
...DEFAULTS,
|
|
170
237
|
...autoDetect,
|
|
171
238
|
...yamlConfig,
|
|
172
239
|
...envConfig,
|
|
173
240
|
...overrides,
|
|
241
|
+
pricing: mergedPricing,
|
|
174
242
|
};
|
|
243
|
+
// Validate agent is a known value
|
|
244
|
+
const VALID_AGENTS = ['claude', 'codex', 'opencode'];
|
|
245
|
+
if (!VALID_AGENTS.includes(merged.agent)) {
|
|
246
|
+
throw new Error(`Invalid agent: "${merged.agent}". Supported agents: ${VALID_AGENTS.join(', ')}`);
|
|
247
|
+
}
|
|
175
248
|
// Derive repoOwner from repo
|
|
176
249
|
if (merged.repo) {
|
|
177
250
|
merged.repoOwner = merged.repo.split('/')[0] ?? '';
|
package/dist/lib/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAQ1C;;;GAGG;AACH,MAAM,UAAU,YAAY,CAC1B,KAAa,EACb,WAAmB,EACnB,YAAoB,EACpB,OAAqC;IAErC,MAAM,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC;IACzB,IAAI,CAAC,CAAC;QAAE,OAAO,CAAC,CAAC;IACjB,OAAO,CAAC,WAAW,GAAG,CAAC,CAAC,KAAK,GAAG,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC,GAAG,SAAS,CAAC;AACvE,CAAC;AAgDD,MAAM,QAAQ,GAAW;IACvB,IAAI,EAAE,EAAE;IACR,SAAS,EAAE,EAAE;IACb,OAAO,EAAE,CAAC;IACV,KAAK,EAAE,QAAQ;IACf,KAAK,EAAE,EAAE;IACT,WAAW,EAAE,EAAE;IACf,YAAY,EAAE,EAAE;IAChB,MAAM,EAAE,KAAK;IACb,UAAU,EAAE,QAAQ;IACpB,MAAM,EAAE,MAAM;IACd,UAAU,EAAE,OAAO;IACnB,cAAc,EAAE,CAAC;IACjB,WAAW,EAAE,WAAW;IACxB,UAAU,EAAE,UAAU;IACtB,SAAS,EAAE,KAAK;IAChB,UAAU,EAAE,KAAK;IACjB,WAAW,EAAE,KAAK;IAClB,aAAa,EAAE,KAAK;IACpB,UAAU,EAAE,KAAK;IACjB,SAAS,EAAE,KAAK;IAChB,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,CAAC;IACZ,kBAAkB,EAAE,CAAC;IACrB,SAAS,EAAE,EAAE;IACb,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,EAAE;IACX,WAAW,EAAE,IAAI;IACjB,OAAO,EAAE,KAAK;IACd,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,EAAE;IACb,YAAY,EAAE,EAAE;IAChB,OAAO,EAAE,mBAAmB;IAC5B,SAAS,EAAE,EAAE;IACb,QAAQ,EAAE,KAAK;IACf,WAAW,EAAE,GAAG;IAChB,WAAW,EAAE,IAAI;IACjB,qBAAqB,EAAE,KAAK;IAC5B,uBAAuB,EAAE,KAAK;IAC9B,OAAO,EAAE;QACP,iBAAiB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;QAChD,mBAAmB,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE;QACjD,kBAAkB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;QAChD,YAAY,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;KAC3C;CACF,CAAC;AAEF,gEAAgE;AAChE,MAAM,YAAY,GAAiC;IACjD,SAAS,EAAE,WAAW;IACtB,IAAI,EAAE,MAAM;IACZ,OAAO,EAAE,SAAS;IAClB,KAAK,EAAE,OAAO;IACd,KAAK,EAAE,OAAO;IACd,YAAY,EAAE,aAAa;IAC3B,aAAa,EAAE,cAAc;IAC7B,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,OAAO,EAAE,QAAQ;IACjB,KAAK,EAAE,YAAY;IACnB,gBAAgB,EAAE,gBAAgB;IAClC,YAAY,EAAE,aAAa;IAC3B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,WAAW,EAAE,YAAY;IACzB,YAAY,EAAE,aAAa;IAC3B,cAAc,EAAE,eAAe;IAC/B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,oBAAoB,EAAE,oBAAoB;IAC1C,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,YAAY,EAAE,aAAa;IAC3B,QAAQ,EAAE,SAAS;IACnB,OAAO,EAAE,SAAS;IAClB,aAAa,EAAE,cAAc;IAC7B,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,SAAS,EAAE,UAAU;IACrB,YAAY,EAAE,aAAa;IAC3B,YAAY,EAAE,aAAa;CAC5B,CAAC;AAEF,2CAA2C;AAC3C,MAAM,WAAW,GAAiC;IAChD,IAAI,EAAE,MAAM;IACZ,OAAO,EAAE,SAAS;IAClB,KAAK,EAAE,OAAO;IACd,KAAK,EAAE,OAAO;IACd,YAAY,EAAE,aAAa;IAC3B,aAAa,EAAE,cAAc;IAC7B,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,gBAAgB,EAAE,gBAAgB;IAClC,YAAY,EAAE,aAAa;IAC3B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,WAAW,EAAE,YAAY;IACzB,YAAY,EAAE,aAAa;IAC3B,cAAc,EAAE,eAAe;IAC/B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,oBAAoB,EAAE,oBAAoB;IAC1C,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,YAAY,EAAE,aAAa;IAC3B,QAAQ,EAAE,SAAS;IACnB,OAAO,EAAE,SAAS;IAClB,aAAa,EAAE,cAAc;IAC7B,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,SAAS,EAAE,UAAU;IACrB,YAAY,EAAE,aAAa;IAC3B,YAAY,EAAE,aAAa;IAC3B,wBAAwB,EAAE,uBAAuB;IACjD,0BAA0B,EAAE,yBAAyB;CACtD,CAAC;AAEF,SAAS,MAAM,CAAC,KAAa,EAAE,OAAgB;IAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IACtD,IAAI,OAAO,OAAO,KAAK,SAAS;QAAE,OAAO,KAAK,KAAK,MAAM,IAAI,KAAK,KAAK,GAAG,CAAC;IAC3E,OAAO,KAAK,CAAC;AACf,CAAC;AAED,4GAA4G;AAC5G,MAAM,UAAU,kBAAkB,CAAC,KAAa,EAAE,IAAY;IAC5D,IAAI,KAAK,KAAK,EAAE;QAAE,OAAO,KAAK,CAAC;IAC/B,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACvC,MAAM,IAAI,KAAK,CAAC,WAAW,IAAI,iCAAiC,KAAK,EAAE,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,QAAQ,CAAC,2BAA2B,EAAE;YAChD,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC,IAAI,EAAE,CAAC;QAEV,2CAA2C;QAC3C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAC1D,IAAI,KAAK;YAAE,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QAE5C,qCAAqC;QACrC,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QACvD,IAAI,GAAG;YAAE,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACxC,CAAC;IAAC,MAAM,CAAC;QACP,8BAA8B;IAChC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,UAAkB;IACxC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,GAAG,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAmC,CAAC;IAChE,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IAErD,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;QAChE,IAAI,OAAO,IAAI,MAAM,EAAE,CAAC;YACrB,MAAkC,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,IAAI,MAAM,CAAC,YAAY,IAAI,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,EAAE,CAAC;QACnE,MAAM,EAAE,GAAG,MAAM,CAAC,YAAuC,CAAC;QAC1D,IAAI,EAAE,CAAC,MAAM,KAAK,KAAK;YAAE,MAAM,CAAC,qBAAqB,GAAG,IAAI,CAAC;QAC7D,IAAI,EAAE,CAAC,aAAa,KAAK,KAAK;YAAE,MAAM,CAAC,uBAAuB,GAAG,IAAI,CAAC;IACxE,CAAC;IAED,4DAA4D;IAC5D,IAAI,MAAM,CAAC,OAAO,IAAI,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzD,MAAM,OAAO,GAAsD,EAAE,CAAC;QACtE,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,OAAkC,CAAC,EAAE,CAAC;YACvF,MAAM,CAAC,GAAG,KAAgC,CAAC;YAC3C,IAAI,OAAO,CAAC,EAAE,KAAK,KAAK,QAAQ,IAAI,OAAO,CAAC,EAAE,MAAM,KAAK,QAAQ,EAAE,CAAC;gBAClE,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;YACxD,CAAC;QACH,CAAC;QACD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa;IACpB,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;QAC9D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACrB,MAAkC,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACpF,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,SAA2B;IACpD,MAAM,UAAU,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAC;IACtD,MAAM,SAAS,GAAG,aAAa,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,YAAY,GAAG,UAAU,EAAE,CAAC;IAClC,MAAM,UAAU,GAAoB,EAAE,CAAC;IACvC,IAAI,YAAY,EAAE,CAAC;QACjB,UAAU,CAAC,IAAI,GAAG,YAAY,CAAC;IACjC,CAAC;IAED,sFAAsF;IACtF,oFAAoF;IACpF,MAAM,aAAa,GAAG;QACpB,GAAG,QAAQ,CAAC,OAAO;QACnB,GAAG,UAAU,CAAC,OAAO;QACrB,GAAG,SAAS,EAAE,OAAO;KACtB,CAAC;IAEF,MAAM,MAAM,GAAW;QACrB,GAAG,QAAQ;QACX,GAAG,UAAU;QACb,GAAG,UAAU;QACb,GAAG,SAAS;QACZ,GAAG,SAAS;QACZ,OAAO,EAAE,aAAa;KACvB,CAAC;IAEF,kCAAkC;IAClC,MAAM,YAAY,GAAG,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAU,CAAC;IAC9D,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAoC,CAAC,EAAE,CAAC;QACxE,MAAM,IAAI,KAAK,CAAC,mBAAmB,MAAM,CAAC,KAAK,wBAAwB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACpG,CAAC;IAED,6BAA6B;IAC7B,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,MAAM,CAAC,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACrD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/** Result of running a single check. */
|
|
2
|
+
export type CheckResult = {
|
|
3
|
+
passed: boolean;
|
|
4
|
+
score: number;
|
|
5
|
+
detail: string;
|
|
6
|
+
};
|
|
7
|
+
/** Base check with a type discriminator. */
|
|
8
|
+
type BaseCheck = {
|
|
9
|
+
type: string;
|
|
10
|
+
};
|
|
11
|
+
/** All tests must pass. */
|
|
12
|
+
export type TestPassCheck = BaseCheck & {
|
|
13
|
+
type: 'test_pass';
|
|
14
|
+
};
|
|
15
|
+
/** A specific file must exist. */
|
|
16
|
+
export type FileExistsCheck = BaseCheck & {
|
|
17
|
+
type: 'file_exists';
|
|
18
|
+
path: string;
|
|
19
|
+
};
|
|
20
|
+
/** A pattern must exist in a file. */
|
|
21
|
+
export type GrepCheck = BaseCheck & {
|
|
22
|
+
type: 'grep';
|
|
23
|
+
file: string;
|
|
24
|
+
pattern: string;
|
|
25
|
+
};
|
|
26
|
+
/** HTTP endpoint check. */
|
|
27
|
+
export type HttpCheck = BaseCheck & {
|
|
28
|
+
type: 'http';
|
|
29
|
+
method: string;
|
|
30
|
+
path: string;
|
|
31
|
+
port?: number;
|
|
32
|
+
expect_status: number;
|
|
33
|
+
expect_body_contains?: string;
|
|
34
|
+
};
|
|
35
|
+
/** Diff size limit check. */
|
|
36
|
+
export type DiffSizeCheck = BaseCheck & {
|
|
37
|
+
type: 'diff_size';
|
|
38
|
+
max_files?: number;
|
|
39
|
+
max_lines?: number;
|
|
40
|
+
};
|
|
41
|
+
/** Keywords that must be present in output. */
|
|
42
|
+
export type KeywordPresentCheck = BaseCheck & {
|
|
43
|
+
type: 'keyword_present';
|
|
44
|
+
keywords: string[];
|
|
45
|
+
};
|
|
46
|
+
/** Keywords that must be absent from output. */
|
|
47
|
+
export type KeywordAbsentCheck = BaseCheck & {
|
|
48
|
+
type: 'keyword_absent';
|
|
49
|
+
keywords: string[];
|
|
50
|
+
};
|
|
51
|
+
/** LLM-judge evaluation. */
|
|
52
|
+
export type LlmJudgeCheck = BaseCheck & {
|
|
53
|
+
type: 'llm_judge';
|
|
54
|
+
model: string;
|
|
55
|
+
rubric: string;
|
|
56
|
+
min_score: number;
|
|
57
|
+
};
|
|
58
|
+
/** Union of all check types. */
|
|
59
|
+
export type CheckDefinition = TestPassCheck | FileExistsCheck | GrepCheck | HttpCheck | DiffSizeCheck | KeywordPresentCheck | KeywordAbsentCheck | LlmJudgeCheck;
|
|
60
|
+
/** Context passed to check runners. */
|
|
61
|
+
export type CheckContext = {
|
|
62
|
+
/** Working directory (worktree root). */
|
|
63
|
+
cwd: string;
|
|
64
|
+
/** Test command to run. */
|
|
65
|
+
testCommand?: string;
|
|
66
|
+
/** Agent output (for step evals). */
|
|
67
|
+
output?: string;
|
|
68
|
+
/** Git diff of changes. */
|
|
69
|
+
diff?: string;
|
|
70
|
+
/** List of changed files. */
|
|
71
|
+
filesChanged?: string[];
|
|
72
|
+
/** Model to use for LLM judge (fallback). */
|
|
73
|
+
judgeModel?: string;
|
|
74
|
+
};
|
|
75
|
+
/**
|
|
76
|
+
* Run a single check against the given context.
|
|
77
|
+
*/
|
|
78
|
+
export declare function runCheck(check: CheckDefinition, ctx: CheckContext): Promise<CheckResult>;
|
|
79
|
+
/**
|
|
80
|
+
* Run all checks and return aggregate results.
|
|
81
|
+
*/
|
|
82
|
+
export declare function runChecks(checks: CheckDefinition[], ctx: CheckContext): Promise<{
|
|
83
|
+
results: CheckResult[];
|
|
84
|
+
allPassed: boolean;
|
|
85
|
+
avgScore: number;
|
|
86
|
+
}>;
|
|
87
|
+
/**
|
|
88
|
+
* Parse check definitions from a checks.yaml object.
|
|
89
|
+
*/
|
|
90
|
+
export declare function parseChecks(raw: unknown): CheckDefinition[];
|
|
91
|
+
export {};
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval Check Executor — runs machine-checkable acceptance criteria.
|
|
3
|
+
*
|
|
4
|
+
* Check types:
|
|
5
|
+
* E2E: test_pass, file_exists, grep, http, diff_size
|
|
6
|
+
* Step: keyword_present, keyword_absent, llm_judge
|
|
7
|
+
*
|
|
8
|
+
* Each check returns { passed, score, detail }.
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
import { exec } from './shell.js';
|
|
13
|
+
import { spawnAgent } from './agent.js';
|
|
14
|
+
import { log } from './logger.js';
|
|
15
|
+
/**
|
|
16
|
+
* Run a single check against the given context.
|
|
17
|
+
*/
|
|
18
|
+
export async function runCheck(check, ctx) {
|
|
19
|
+
switch (check.type) {
|
|
20
|
+
case 'test_pass':
|
|
21
|
+
return runTestPassCheck(check, ctx);
|
|
22
|
+
case 'file_exists':
|
|
23
|
+
return runFileExistsCheck(check, ctx);
|
|
24
|
+
case 'grep':
|
|
25
|
+
return runGrepCheck(check, ctx);
|
|
26
|
+
case 'http':
|
|
27
|
+
return runHttpCheck(check, ctx);
|
|
28
|
+
case 'diff_size':
|
|
29
|
+
return runDiffSizeCheck(check, ctx);
|
|
30
|
+
case 'keyword_present':
|
|
31
|
+
return runKeywordPresentCheck(check, ctx);
|
|
32
|
+
case 'keyword_absent':
|
|
33
|
+
return runKeywordAbsentCheck(check, ctx);
|
|
34
|
+
case 'llm_judge':
|
|
35
|
+
return runLlmJudgeCheck(check, ctx);
|
|
36
|
+
default:
|
|
37
|
+
return { passed: false, score: 0, detail: `Unknown check type: ${check.type}` };
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Run all checks and return aggregate results.
|
|
42
|
+
*/
|
|
43
|
+
export async function runChecks(checks, ctx) {
|
|
44
|
+
const results = [];
|
|
45
|
+
for (const check of checks) {
|
|
46
|
+
try {
|
|
47
|
+
const result = await runCheck(check, ctx);
|
|
48
|
+
results.push(result);
|
|
49
|
+
}
|
|
50
|
+
catch (err) {
|
|
51
|
+
results.push({
|
|
52
|
+
passed: false,
|
|
53
|
+
score: 0,
|
|
54
|
+
detail: `Check ${check.type} threw: ${err instanceof Error ? err.message : String(err)}`,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const allPassed = results.every((r) => r.passed);
|
|
59
|
+
const avgScore = results.length > 0
|
|
60
|
+
? results.reduce((sum, r) => sum + r.score, 0) / results.length
|
|
61
|
+
: 0;
|
|
62
|
+
return { results, allPassed, avgScore };
|
|
63
|
+
}
|
|
64
|
+
// --- Individual check runners ---
|
|
65
|
+
async function runTestPassCheck(_check, ctx) {
|
|
66
|
+
const cmd = ctx.testCommand ?? 'npm test';
|
|
67
|
+
const result = exec(cmd, { cwd: ctx.cwd, timeout: 120_000 });
|
|
68
|
+
const passed = result.exitCode === 0;
|
|
69
|
+
return {
|
|
70
|
+
passed,
|
|
71
|
+
score: passed ? 1 : 0,
|
|
72
|
+
detail: passed ? 'All tests passed' : `Tests failed (exit ${result.exitCode}): ${result.stderr.slice(0, 500)}`,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
async function runFileExistsCheck(check, ctx) {
|
|
76
|
+
const fullPath = join(ctx.cwd, check.path);
|
|
77
|
+
const passed = existsSync(fullPath);
|
|
78
|
+
return {
|
|
79
|
+
passed,
|
|
80
|
+
score: passed ? 1 : 0,
|
|
81
|
+
detail: passed ? `File exists: ${check.path}` : `File not found: ${check.path}`,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
async function runGrepCheck(check, ctx) {
|
|
85
|
+
const fullPath = join(ctx.cwd, check.file);
|
|
86
|
+
if (!existsSync(fullPath)) {
|
|
87
|
+
return { passed: false, score: 0, detail: `File not found: ${check.file}` };
|
|
88
|
+
}
|
|
89
|
+
const content = readFileSync(fullPath, 'utf-8');
|
|
90
|
+
const regex = new RegExp(check.pattern);
|
|
91
|
+
const passed = regex.test(content);
|
|
92
|
+
return {
|
|
93
|
+
passed,
|
|
94
|
+
score: passed ? 1 : 0,
|
|
95
|
+
detail: passed ? `Pattern "${check.pattern}" found in ${check.file}` : `Pattern "${check.pattern}" not found in ${check.file}`,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
async function runHttpCheck(check, ctx) {
|
|
99
|
+
try {
|
|
100
|
+
const url = `http://localhost:${check.port ?? 3000}${check.path}`;
|
|
101
|
+
const response = await fetch(url, { method: check.method, signal: AbortSignal.timeout(10_000) });
|
|
102
|
+
const body = await response.text();
|
|
103
|
+
const statusMatch = response.status === check.expect_status;
|
|
104
|
+
const bodyMatch = check.expect_body_contains ? body.includes(check.expect_body_contains) : true;
|
|
105
|
+
const passed = statusMatch && bodyMatch;
|
|
106
|
+
return {
|
|
107
|
+
passed,
|
|
108
|
+
score: passed ? 1 : 0,
|
|
109
|
+
detail: passed
|
|
110
|
+
? `HTTP ${check.method} ${check.path} returned ${response.status}`
|
|
111
|
+
: `HTTP check failed: status=${response.status} (expected ${check.expect_status}), body match=${bodyMatch}`,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
catch (err) {
|
|
115
|
+
return {
|
|
116
|
+
passed: false,
|
|
117
|
+
score: 0,
|
|
118
|
+
detail: `HTTP check failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
async function runDiffSizeCheck(check, ctx) {
|
|
123
|
+
const filesCount = ctx.filesChanged?.length ?? 0;
|
|
124
|
+
const diff = ctx.diff ?? '';
|
|
125
|
+
const lineCount = diff.split('\n').filter((l) => l.startsWith('+') || l.startsWith('-')).length;
|
|
126
|
+
const fileOk = check.max_files == null || filesCount <= check.max_files;
|
|
127
|
+
const lineOk = check.max_lines == null || lineCount <= check.max_lines;
|
|
128
|
+
const passed = fileOk && lineOk;
|
|
129
|
+
return {
|
|
130
|
+
passed,
|
|
131
|
+
score: passed ? 1 : 0,
|
|
132
|
+
detail: `Diff: ${filesCount} files, ${lineCount} lines` +
|
|
133
|
+
(!fileOk ? ` (max ${check.max_files} files exceeded)` : '') +
|
|
134
|
+
(!lineOk ? ` (max ${check.max_lines} lines exceeded)` : ''),
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
async function runKeywordPresentCheck(check, ctx) {
|
|
138
|
+
const output = ctx.output ?? '';
|
|
139
|
+
const found = check.keywords.filter((kw) => output.includes(kw));
|
|
140
|
+
const missing = check.keywords.filter((kw) => !output.includes(kw));
|
|
141
|
+
const passed = missing.length === 0;
|
|
142
|
+
return {
|
|
143
|
+
passed,
|
|
144
|
+
score: check.keywords.length > 0 ? found.length / check.keywords.length : 1,
|
|
145
|
+
detail: passed
|
|
146
|
+
? `All keywords found: ${check.keywords.join(', ')}`
|
|
147
|
+
: `Missing keywords: ${missing.join(', ')}`,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
async function runKeywordAbsentCheck(check, ctx) {
|
|
151
|
+
const output = ctx.output ?? '';
|
|
152
|
+
const present = check.keywords.filter((kw) => output.includes(kw));
|
|
153
|
+
const passed = present.length === 0;
|
|
154
|
+
return {
|
|
155
|
+
passed,
|
|
156
|
+
score: passed ? 1 : 0,
|
|
157
|
+
detail: passed
|
|
158
|
+
? `No forbidden keywords found`
|
|
159
|
+
: `Forbidden keywords present: ${present.join(', ')}`,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
async function runLlmJudgeCheck(check, ctx) {
|
|
163
|
+
const model = check.model || ctx.judgeModel || 'claude-haiku-4-5';
|
|
164
|
+
const prompt = `You are an evaluation judge. Score the following output on a scale of 1-5 based on this rubric.
|
|
165
|
+
|
|
166
|
+
## Rubric
|
|
167
|
+
${check.rubric}
|
|
168
|
+
|
|
169
|
+
## Output to evaluate
|
|
170
|
+
${(ctx.output ?? '').slice(0, 8000)}
|
|
171
|
+
|
|
172
|
+
Respond with ONLY a single number (1-5) on the first line, followed by a brief explanation.`;
|
|
173
|
+
try {
|
|
174
|
+
const result = await spawnAgent({
|
|
175
|
+
agent: 'claude',
|
|
176
|
+
model,
|
|
177
|
+
prompt,
|
|
178
|
+
cwd: ctx.cwd,
|
|
179
|
+
timeout: 60_000,
|
|
180
|
+
maxTurns: 1,
|
|
181
|
+
});
|
|
182
|
+
const scoreMatch = result.output.match(/^(\d)/m);
|
|
183
|
+
const score = scoreMatch ? parseInt(scoreMatch[1], 10) : 0;
|
|
184
|
+
const normalizedScore = Math.max(0, Math.min(5, score));
|
|
185
|
+
const passed = normalizedScore >= check.min_score;
|
|
186
|
+
return {
|
|
187
|
+
passed,
|
|
188
|
+
score: normalizedScore / 5,
|
|
189
|
+
detail: `LLM judge score: ${normalizedScore}/5 (min: ${check.min_score}). ${result.output.slice(0, 200)}`,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
catch (err) {
|
|
193
|
+
log.warn(`LLM judge check failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
194
|
+
return {
|
|
195
|
+
passed: false,
|
|
196
|
+
score: 0,
|
|
197
|
+
detail: `LLM judge failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Parse check definitions from a checks.yaml object.
|
|
203
|
+
*/
|
|
204
|
+
export function parseChecks(raw) {
|
|
205
|
+
if (!raw || typeof raw !== 'object')
|
|
206
|
+
return [];
|
|
207
|
+
const obj = raw;
|
|
208
|
+
const checks = Array.isArray(obj.checks) ? obj.checks : [];
|
|
209
|
+
return checks.map((c) => {
|
|
210
|
+
const type = String(c.type ?? '');
|
|
211
|
+
switch (type) {
|
|
212
|
+
case 'test_pass':
|
|
213
|
+
return { type: 'test_pass' };
|
|
214
|
+
case 'file_exists':
|
|
215
|
+
return { type: 'file_exists', path: String(c.path ?? '') };
|
|
216
|
+
case 'grep':
|
|
217
|
+
return { type: 'grep', file: String(c.file ?? ''), pattern: String(c.pattern ?? '') };
|
|
218
|
+
case 'http':
|
|
219
|
+
return {
|
|
220
|
+
type: 'http',
|
|
221
|
+
method: String(c.method ?? 'GET'),
|
|
222
|
+
path: String(c.path ?? '/'),
|
|
223
|
+
expect_status: Number(c.expect_status ?? 200),
|
|
224
|
+
expect_body_contains: c.expect_body_contains ? String(c.expect_body_contains) : undefined,
|
|
225
|
+
};
|
|
226
|
+
case 'diff_size':
|
|
227
|
+
return {
|
|
228
|
+
type: 'diff_size',
|
|
229
|
+
max_files: c.max_files != null ? Number(c.max_files) : undefined,
|
|
230
|
+
max_lines: c.max_lines != null ? Number(c.max_lines) : undefined,
|
|
231
|
+
};
|
|
232
|
+
case 'keyword_present':
|
|
233
|
+
return {
|
|
234
|
+
type: 'keyword_present',
|
|
235
|
+
keywords: Array.isArray(c.keywords) ? c.keywords.map(String) : [],
|
|
236
|
+
};
|
|
237
|
+
case 'keyword_absent':
|
|
238
|
+
return {
|
|
239
|
+
type: 'keyword_absent',
|
|
240
|
+
keywords: Array.isArray(c.keywords) ? c.keywords.map(String) : [],
|
|
241
|
+
};
|
|
242
|
+
case 'llm_judge':
|
|
243
|
+
return {
|
|
244
|
+
type: 'llm_judge',
|
|
245
|
+
model: String(c.model ?? ''),
|
|
246
|
+
rubric: String(c.rubric ?? ''),
|
|
247
|
+
min_score: Number(c.min_score ?? 3),
|
|
248
|
+
};
|
|
249
|
+
default:
|
|
250
|
+
return { type };
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
//# sourceMappingURL=eval-checks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-checks.js","sourceRoot":"","sources":["../../src/lib/eval-checks.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,EAAE,GAAG,EAAE,MAAM,aAAa,CAAC;AA8FlC;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAsB,EAAE,GAAiB;IACtE,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,WAAW;YACd,OAAO,gBAAgB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACtC,KAAK,aAAa;YAChB,OAAO,kBAAkB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACxC,KAAK,MAAM;YACT,OAAO,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAClC,KAAK,MAAM;YACT,OAAO,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAClC,KAAK,WAAW;YACd,OAAO,gBAAgB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACtC,KAAK,iBAAiB;YACpB,OAAO,sBAAsB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAC5C,KAAK,gBAAgB;YACnB,OAAO,qBAAqB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAC3C,KAAK,WAAW;YACd,OAAO,gBAAgB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACtC;YACE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,uBAAwB,KAAmB,CAAC,IAAI,EAAE,EAAE,CAAC;IACnG,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAyB,EAAE,GAAiB;IAK1E,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,SAAS,KAAK,CAAC,IAAI,WAAW,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;aACzF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;QACjC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM;QAC/D,CAAC,CAAC,CAAC,CAAC;IAEN,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC;AAED,mCAAmC;AAEnC,KAAK,UAAU,gBAAgB,CAAC,MAAqB,EAAE,GAAiB;IACtE,MAAM,GAAG,GAAG,GAAG,CAAC,WAAW,IAAI,UAAU,CAAC;IAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,KAAK,CAAC,CAAC;IACrC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,sBAAsB,MAAM,CAAC,QAAQ,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;KAC/G,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,KAAsB,EAAE,GAAiB;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IACpC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,gBAAgB,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,mBAAmB,KAAK,CAAC,IAAI,EAAE;KAChF,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,KAAgB,EAAE,GAAiB;IAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,mBAAmB,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;IAC9E,CAAC;IACD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACnC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,YAAY,KAAK,CAAC,OAAO,cAAc,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,YAAY,KAAK,CAAC,OAAO,kBAAkB,KAAK,CAAC,IAAI,EAAE;KAC/H,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,KAAgB,EAAE,GAAiB;IAC7D,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,oBAAoB,KAAK,CAAC,IAAI,IAAI,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAClE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACjG,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,KAAK,KAAK,CAAC,aAAa,CAAC;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAChG,MAAM,MAAM,GAAG,WAAW,IAAI,SAAS,CAAC;QACxC,OAAO;YACL,MAAM;YACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,MAAM,EAAE,MAAM;gBACZ,CAAC,CAAC,QAAQ,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,IAAI,aAAa,QAAQ,CAAC,MAAM,EAAE;gBAClE,CAAC,CAAC,6BAA6B,QAAQ,CAAC,MAAM,cAAc,KAAK,CAAC,aAAa,iBAAiB,SAAS,EAAE;SAC9G,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,CAAC;YACR,MAAM,EAAE,sBAAsB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;SACjF,CAAC;IACJ,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,KAAoB,EAAE,GAAiB;IACrE,MAAM,UAAU,GAAG,GAAG,CAAC,YAAY,EAAE,MAAM,IAAI,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;IAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;IAEhG,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,IAAI,IAAI,IAAI,UAAU,IAAI,KAAK,CAAC,SAAS,CAAC;IACxE,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,IAAI,IAAI,IAAI,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC;IACvE,MAAM,MAAM,GAAG,MAAM,IAAI,MAAM,CAAC;IAEhC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,SAAS,UAAU,WAAW,SAAS,QAAQ;YACrD,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,KAAK,CAAC,SAAS,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3D,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,KAAK,CAAC,SAAS,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;KAC9D,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,sBAAsB,CAAC,KAA0B,EAAE,GAAiB;IACjF,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC;IAChC,MAAM,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACpE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC;IACpC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,KAAK,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3E,MAAM,EAAE,MAAM;YACZ,CAAC,CAAC,uBAAuB,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YACpD,CAAC,CAAC,qBAAqB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;KAC9C,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,KAAyB,EAAE,GAAiB;IAC/E,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC;IAChC,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC;IACpC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM;YACZ,CAAC,CAAC,6BAA6B;YAC/B,CAAC,CAAC,+BAA+B,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;KACxD,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,KAAoB,EAAE,GAAiB;IACrE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAClE,MAAM,MAAM,GAAG;;;EAGf,KAAK,CAAC,MAAM;;;EAGZ,CAAC,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;;4FAEyD,CAAC;IAE3F,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC;YAC9B,KAAK,EAAE,QAAQ;YACf,KAAK;YACL,MAAM;YACN,GAAG,EAAE,GAAG,CAAC,GAAG;YACZ,OAAO,EAAE,MAAM;YACf,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;QACxD,MAAM,MAAM,GAAG,eAAe,IAAI,KAAK,CAAC,SAAS,CAAC;QAElD,OAAO;YACL,MAAM;YACN,KAAK,EAAE,eAAe,GAAG,CAAC;YAC1B,MAAM,EAAE,oBAAoB,eAAe,YAAY,KAAK,CAAC,SAAS,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;SAC1G,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,2BAA2B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxF,OAAO;YACL,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,CAAC;YACR,MAAM,EAAE,qBAAqB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;SAChF,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,GAAY;IACtC,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IAC/C,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAE3D,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE;QAC/C,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QAClC,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,WAAW;gBACd,OAAO,EAAE,IAAI,EAAE,WAAW,EAAmB,CAAC;YAChD,KAAK,aAAa;gBAChB,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,EAAqB,CAAC;YAChF,KAAK,MAAM;gBACT,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,EAAe,CAAC;YACrG,KAAK,MAAM;gBACT,OAAO;oBACL,IAAI,EAAE,MAAM;oBACZ,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,GAAG,CAAC;oBAC3B,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC,aAAa,IAAI,GAAG,CAAC;oBAC7C,oBAAoB,EAAE,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,SAAS;iBAC7E,CAAC;YACjB,KAAK,WAAW;gBACd,OAAO;oBACL,IAAI,EAAE,WAAW;oBACjB,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;oBAChE,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;iBAChD,CAAC;YACrB,KAAK,iBAAiB;gBACpB,OAAO;oBACL,IAAI,EAAE,iBAAiB;oBACvB,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;iBAC3C,CAAC;YAC3B,KAAK,gBAAgB;gBACnB,OAAO;oBACL,IAAI,EAAE,gBAAgB;oBACtB,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;iBAC5C,CAAC;YAC1B,KAAK,WAAW;gBACd,OAAO;oBACL,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;oBAC5B,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC;oBAC9B,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC;iBACnB,CAAC;YACrB;gBACE,OAAO,EAAE,IAAI,EAAkC,CAAC;QACpD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { Config } from './config.js';
|
|
2
|
+
import type { CheckDefinition } from './eval-checks.js';
|
|
3
|
+
import type { EvalCase, EvalSuiteResult } from './eval.js';
|
|
4
|
+
/** Options for running the eval suite. */
|
|
5
|
+
export type EvalRunOptions = {
|
|
6
|
+
/** Only run this specific case ID (prefix match). */
|
|
7
|
+
caseId?: string;
|
|
8
|
+
/** Verbose output. */
|
|
9
|
+
verbose?: boolean;
|
|
10
|
+
};
|
|
11
|
+
/** Extended eval case with parsed check definitions. */
|
|
12
|
+
export type EvalCaseWithChecks = EvalCase & {
|
|
13
|
+
checks?: CheckDefinition[];
|
|
14
|
+
/** For step cases: raw input text. */
|
|
15
|
+
inputText?: string;
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Run a full eval suite: iterate over cases, execute, collect results.
|
|
19
|
+
*/
|
|
20
|
+
export declare function runEvalSuite(cases: EvalCaseWithChecks[], config: Config, options?: EvalRunOptions): Promise<EvalSuiteResult>;
|
|
21
|
+
/**
|
|
22
|
+
* Prepare a fixture directory for an e2e eval case.
|
|
23
|
+
* Clones the repo at the specified ref into a worktree.
|
|
24
|
+
*/
|
|
25
|
+
export declare function prepareFixture(evalCase: EvalCase, projectDir: string): string;
|
|
26
|
+
/**
|
|
27
|
+
* Snapshot the current harness state (prompts + skills + config) as a hash.
|
|
28
|
+
*/
|
|
29
|
+
export declare function snapshotHarness(config: Config): string;
|