@bradtaylorsf/alpha-loop 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +60 -19
  2. package/dist/cli.js +83 -1
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/auth.js +1 -1
  5. package/dist/commands/auth.js.map +1 -1
  6. package/dist/commands/eval.d.ts +53 -0
  7. package/dist/commands/eval.js +538 -0
  8. package/dist/commands/eval.js.map +1 -0
  9. package/dist/commands/evolve.d.ts +25 -0
  10. package/dist/commands/evolve.js +270 -0
  11. package/dist/commands/evolve.js.map +1 -0
  12. package/dist/commands/history.d.ts +1 -1
  13. package/dist/commands/history.js +4 -4
  14. package/dist/commands/history.js.map +1 -1
  15. package/dist/commands/init.d.ts +14 -0
  16. package/dist/commands/init.js +199 -30
  17. package/dist/commands/init.js.map +1 -1
  18. package/dist/commands/resume.js +1 -0
  19. package/dist/commands/resume.js.map +1 -1
  20. package/dist/commands/run.js +170 -12
  21. package/dist/commands/run.js.map +1 -1
  22. package/dist/commands/scan.d.ts +1 -1
  23. package/dist/commands/scan.js +12 -9
  24. package/dist/commands/scan.js.map +1 -1
  25. package/dist/commands/sync.d.ts +5 -0
  26. package/dist/commands/sync.js +24 -5
  27. package/dist/commands/sync.js.map +1 -1
  28. package/dist/commands/vision.js +5 -3
  29. package/dist/commands/vision.js.map +1 -1
  30. package/dist/engine/agents.d.ts +6 -1
  31. package/dist/engine/agents.js +14 -12
  32. package/dist/engine/agents.js.map +1 -1
  33. package/dist/engine/prerequisites.d.ts +4 -7
  34. package/dist/engine/prerequisites.js +12 -36
  35. package/dist/engine/prerequisites.js.map +1 -1
  36. package/dist/lib/agent.d.ts +18 -0
  37. package/dist/lib/agent.js +211 -30
  38. package/dist/lib/agent.js.map +1 -1
  39. package/dist/lib/config.d.ts +25 -2
  40. package/dist/lib/config.js +80 -7
  41. package/dist/lib/config.js.map +1 -1
  42. package/dist/lib/eval-checks.d.ts +91 -0
  43. package/dist/lib/eval-checks.js +254 -0
  44. package/dist/lib/eval-checks.js.map +1 -0
  45. package/dist/lib/eval-runner.d.ts +29 -0
  46. package/dist/lib/eval-runner.js +439 -0
  47. package/dist/lib/eval-runner.js.map +1 -0
  48. package/dist/lib/eval.d.ts +170 -0
  49. package/dist/lib/eval.js +507 -0
  50. package/dist/lib/eval.js.map +1 -0
  51. package/dist/lib/learning.js +2 -2
  52. package/dist/lib/learning.js.map +1 -1
  53. package/dist/lib/pipeline.d.ts +44 -0
  54. package/dist/lib/pipeline.js +607 -138
  55. package/dist/lib/pipeline.js.map +1 -1
  56. package/dist/lib/prompts.d.ts +19 -0
  57. package/dist/lib/prompts.js +56 -5
  58. package/dist/lib/prompts.js.map +1 -1
  59. package/dist/lib/score.d.ts +80 -0
  60. package/dist/lib/score.js +172 -0
  61. package/dist/lib/score.js.map +1 -0
  62. package/dist/lib/session.d.ts +2 -1
  63. package/dist/lib/session.js +70 -19
  64. package/dist/lib/session.js.map +1 -1
  65. package/dist/lib/traces.d.ts +173 -0
  66. package/dist/lib/traces.js +272 -0
  67. package/dist/lib/traces.js.map +1 -0
  68. package/dist/lib/verify.d.ts +7 -1
  69. package/dist/lib/verify.js +109 -157
  70. package/dist/lib/verify.js.map +1 -1
  71. package/dist/lib/worktree.d.ts +1 -0
  72. package/dist/lib/worktree.js +9 -1
  73. package/dist/lib/worktree.js.map +1 -1
  74. package/package.json +1 -1
  75. package/templates/agents/implementer.md +1 -1
  76. package/templates/agents/reviewer.md +1 -1
  77. package/dist/engine/config.d.ts +0 -71
  78. package/dist/engine/config.js +0 -73
  79. package/dist/engine/config.js.map +0 -1
@@ -1,12 +1,23 @@
1
1
  import { readFileSync, existsSync } from 'node:fs';
2
2
  import { execSync } from 'node:child_process';
3
3
  import { parse as parseYaml } from 'yaml';
4
+ /**
5
+ * Estimate cost in USD from token counts and a pricing table.
6
+ * Returns 0 if the model is not in the pricing table.
7
+ */
8
+ export function estimateCost(model, inputTokens, outputTokens, pricing) {
9
+ const p = pricing[model];
10
+ if (!p)
11
+ return 0;
12
+ return (inputTokens * p.input + outputTokens * p.output) / 1_000_000;
13
+ }
4
14
  const DEFAULTS = {
5
15
  repo: '',
6
16
  repoOwner: '',
7
17
  project: 2,
8
- model: 'opus',
9
- reviewModel: 'opus',
18
+ agent: 'claude',
19
+ model: '',
20
+ reviewModel: '',
10
21
  pollInterval: 60,
11
22
  dryRun: false,
12
23
  baseBranch: 'master',
@@ -15,7 +26,6 @@ const DEFAULTS = {
15
26
  maxTestRetries: 3,
16
27
  testCommand: 'pnpm test',
17
28
  devCommand: 'pnpm dev',
18
- port: 3000,
19
29
  skipTests: false,
20
30
  skipReview: false,
21
31
  skipInstall: false,
@@ -32,12 +42,27 @@ const DEFAULTS = {
32
42
  runFull: false,
33
43
  verbose: false,
34
44
  harnesses: [],
45
+ setupCommand: '',
46
+ evalDir: '.alpha-loop/evals',
47
+ evalModel: '',
48
+ skipEval: false,
49
+ evalTimeout: 300,
50
+ autoCapture: true,
51
+ skipPostSessionReview: false,
52
+ skipPostSessionSecurity: false,
53
+ pricing: {
54
+ 'claude-opus-4-6': { input: 15.0, output: 75.0 },
55
+ 'claude-sonnet-4-6': { input: 3.0, output: 15.0 },
56
+ 'claude-haiku-4-5': { input: 0.80, output: 4.0 },
57
+ 'codex-mini': { input: 1.50, output: 6.0 },
58
+ },
35
59
  };
36
60
  /** Map from YAML key (snake_case) to Config key (camelCase). */
37
61
  const YAML_KEY_MAP = {
38
62
  harnesses: 'harnesses',
39
63
  repo: 'repo',
40
64
  project: 'project',
65
+ agent: 'agent',
41
66
  model: 'model',
42
67
  review_model: 'reviewModel',
43
68
  poll_interval: 'pollInterval',
@@ -48,7 +73,6 @@ const YAML_KEY_MAP = {
48
73
  max_test_retries: 'maxTestRetries',
49
74
  test_command: 'testCommand',
50
75
  dev_command: 'devCommand',
51
- port: 'port',
52
76
  skip_tests: 'skipTests',
53
77
  skip_review: 'skipReview',
54
78
  skip_install: 'skipInstall',
@@ -64,11 +88,18 @@ const YAML_KEY_MAP = {
64
88
  auto_cleanup: 'autoCleanup',
65
89
  run_full: 'runFull',
66
90
  verbose: 'verbose',
91
+ setup_command: 'setupCommand',
92
+ eval_dir: 'evalDir',
93
+ eval_model: 'evalModel',
94
+ skip_eval: 'skipEval',
95
+ eval_timeout: 'evalTimeout',
96
+ auto_capture: 'autoCapture',
67
97
  };
68
98
  /** Map from env var name to Config key. */
69
99
  const ENV_KEY_MAP = {
70
100
  REPO: 'repo',
71
- PROJECT_NUM: 'project',
101
+ PROJECT: 'project',
102
+ AGENT: 'agent',
72
103
  MODEL: 'model',
73
104
  REVIEW_MODEL: 'reviewModel',
74
105
  POLL_INTERVAL: 'pollInterval',
@@ -79,7 +110,6 @@ const ENV_KEY_MAP = {
79
110
  MAX_TEST_RETRIES: 'maxTestRetries',
80
111
  TEST_COMMAND: 'testCommand',
81
112
  DEV_COMMAND: 'devCommand',
82
- PORT: 'port',
83
113
  SKIP_TESTS: 'skipTests',
84
114
  SKIP_REVIEW: 'skipReview',
85
115
  SKIP_INSTALL: 'skipInstall',
@@ -95,6 +125,14 @@ const ENV_KEY_MAP = {
95
125
  AUTO_CLEANUP: 'autoCleanup',
96
126
  RUN_FULL: 'runFull',
97
127
  VERBOSE: 'verbose',
128
+ SETUP_COMMAND: 'setupCommand',
129
+ EVAL_DIR: 'evalDir',
130
+ EVAL_MODEL: 'evalModel',
131
+ SKIP_EVAL: 'skipEval',
132
+ EVAL_TIMEOUT: 'evalTimeout',
133
+ AUTO_CAPTURE: 'autoCapture',
134
+ SKIP_POST_SESSION_REVIEW: 'skipPostSessionReview',
135
+ SKIP_POST_SESSION_SECURITY: 'skipPostSessionSecurity',
98
136
  };
99
137
  function coerce(value, current) {
100
138
  if (typeof current === 'number')
@@ -103,8 +141,10 @@ function coerce(value, current) {
103
141
  return value === 'true' || value === '1';
104
142
  return value;
105
143
  }
106
- /** Validate a string contains only safe shell characters. */
144
+ /** Validate a string contains only safe shell characters. Empty strings are allowed (model is optional). */
107
145
  export function assertSafeShellArg(value, name) {
146
+ if (value === '')
147
+ return value;
108
148
  if (!/^[a-zA-Z0-9._\-/]+$/.test(value)) {
109
149
  throw new Error(`Invalid ${name}: contains unsafe characters: ${value}`);
110
150
  }
@@ -143,6 +183,27 @@ function loadYamlConfig(configPath) {
143
183
  result[configKey] = parsed[yamlKey];
144
184
  }
145
185
  }
186
+ // Handle post_session nested config
187
+ if (parsed.post_session && typeof parsed.post_session === 'object') {
188
+ const ps = parsed.post_session;
189
+ if (ps.review === false)
190
+ result.skipPostSessionReview = true;
191
+ if (ps.security_scan === false)
192
+ result.skipPostSessionSecurity = true;
193
+ }
194
+ // Handle pricing table (nested object, not in YAML_KEY_MAP)
195
+ if (parsed.pricing && typeof parsed.pricing === 'object') {
196
+ const pricing = {};
197
+ for (const [model, value] of Object.entries(parsed.pricing)) {
198
+ const v = value;
199
+ if (typeof v?.input === 'number' && typeof v?.output === 'number') {
200
+ pricing[model] = { input: v.input, output: v.output };
201
+ }
202
+ }
203
+ if (Object.keys(pricing).length > 0) {
204
+ result.pricing = pricing;
205
+ }
206
+ }
146
207
  return result;
147
208
  }
148
209
  function loadEnvConfig() {
@@ -165,13 +226,25 @@ export function loadConfig(overrides) {
165
226
  autoDetect.repo = detectedRepo;
166
227
  }
167
228
  // Precedence: overrides (CLI flags) > env vars > config file > auto-detect > defaults
229
+ // Pricing is merged specially: YAML/overrides extend defaults rather than replacing
230
+ const mergedPricing = {
231
+ ...DEFAULTS.pricing,
232
+ ...yamlConfig.pricing,
233
+ ...overrides?.pricing,
234
+ };
168
235
  const merged = {
169
236
  ...DEFAULTS,
170
237
  ...autoDetect,
171
238
  ...yamlConfig,
172
239
  ...envConfig,
173
240
  ...overrides,
241
+ pricing: mergedPricing,
174
242
  };
243
+ // Validate agent is a known value
244
+ const VALID_AGENTS = ['claude', 'codex', 'opencode'];
245
+ if (!VALID_AGENTS.includes(merged.agent)) {
246
+ throw new Error(`Invalid agent: "${merged.agent}". Supported agents: ${VALID_AGENTS.join(', ')}`);
247
+ }
175
248
  // Derive repoOwner from repo
176
249
  if (merged.repo) {
177
250
  merged.repoOwner = merged.repo.split('/')[0] ?? '';
@@ -1 +1 @@
1
- {"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAmC1C,MAAM,QAAQ,GAAW;IACvB,IAAI,EAAE,EAAE;IACR,SAAS,EAAE,EAAE;IACb,OAAO,EAAE,CAAC;IACV,KAAK,EAAE,MAAM;IACb,WAAW,EAAE,MAAM;IACnB,YAAY,EAAE,EAAE;IAChB,MAAM,EAAE,KAAK;IACb,UAAU,EAAE,QAAQ;IACpB,MAAM,EAAE,MAAM;IACd,UAAU,EAAE,OAAO;IACnB,cAAc,EAAE,CAAC;IACjB,WAAW,EAAE,WAAW;IACxB,UAAU,EAAE,UAAU;IACtB,IAAI,EAAE,IAAI;IACV,SAAS,EAAE,KAAK;IAChB,UAAU,EAAE,KAAK;IACjB,WAAW,EAAE,KAAK;IAClB,aAAa,EAAE,KAAK;IACpB,UAAU,EAAE,KAAK;IACjB,SAAS,EAAE,KAAK;IAChB,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,CAAC;IACZ,kBAAkB,EAAE,CAAC;IACrB,SAAS,EAAE,EAAE;IACb,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,EAAE;IACX,WAAW,EAAE,IAAI;IACjB,OAAO,EAAE,KAAK;IACd,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,EAAE;CACd,CAAC;AAEF,gEAAgE;AAChE,MAAM,YAAY,GAAiC;IACjD,SAAS,EAAE,WAAW;IACtB,IAAI,EAAE,MAAM;IACZ,OAAO,EAAE,SAAS;IAClB,KAAK,EAAE,OAAO;IACd,YAAY,EAAE,aAAa;IAC3B,aAAa,EAAE,cAAc;IAC7B,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,OAAO,EAAE,QAAQ;IACjB,KAAK,EAAE,YAAY;IACnB,gBAAgB,EAAE,gBAAgB;IAClC,YAAY,EAAE,aAAa;IAC3B,WAAW,EAAE,YAAY;IACzB,IAAI,EAAE,MAAM;IACZ,UAAU,EAAE,WAAW;IACvB,WAAW,EAAE,YAAY;IACzB,YAAY,EAAE,aAAa;IAC3B,cAAc,EAAE,eAAe;IAC/B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,oBAAoB,EAAE,oBAAoB;IAC1C,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,YAAY,EAAE,aAAa;IAC3B,QAAQ,EAAE,SAAS;IACnB,OAAO,EAAE,SAAS;CACnB,CAAC;AAEF,2CAA2C;AAC3C,MAAM,WAAW,GAAiC;IAChD,IAAI,EAAE,MAAM;IACZ,WAAW,EAAE,SAAS;IACtB,KAAK,EAAE,OAAO;IACd,YAAY,EAAE,aAAa;IAC3B,aAAa,EAAE,cAAc;IAC7B,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,gBAAgB,EAAE,gBAAgB;IAClC,YAAY,EAAE,aAAa;IAC3B,WAAW,EAAE,YAAY;IACzB,IAAI,EAAE,MAAM;IACZ,UAAU,EAAE,WAAW;IACvB,WAAW,EAAE,YAAY;IACzB,YAAY,EAAE,aAAa;IAC3B,cAAc,EAAE,eAAe;IAC/B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,oBAAoB,EAAE,oBAAoB;IAC1C,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,YAAY,EAAE,aAAa;IAC3B,QAAQ,EAAE,SAAS;IACnB,OAAO,EAAE,SAAS;CACnB,CAAC;AAEF,SAAS,MAAM,CAAC,KAAa,EAAE,OAAgB;IAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IACtD,IAAI,OAAO,OAAO,KAAK,SAAS;QAAE,OAAO,KAAK,KAAK,MAAM,IAAI,KAAK,KAAK,GAAG,CAAC;IAC3E,OAAO,KAAK,CAAC;AACf,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,kBAAkB,CAAC,KAAa,EAAE,IAAY;IAC5D,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACvC,MAAM,IAAI,KAAK,CAAC,WAAW,IAAI,iCAAiC,KAAK,EAAE,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,QAAQ,CAAC,2BAA2B,EAAE;YAChD,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC,IAAI,EAAE,CAAC;QAEV,2CAA2C;QAC3C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAC1D,IAAI,KAAK;YAAE,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QAE5C,qCAAqC;QACrC,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QACvD,IAAI,GAAG;YAAE,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACxC,CAAC;IAAC,MAAM,CAAC;QACP,8BAA8B;IAChC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,UAAkB;IACxC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,GAAG,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAmC,CAAC;IAChE,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IAErD,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;QAChE,IAAI,OAAO,IAAI,MAAM,EAAE,CAAC;YACrB,MAAkC,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa;IACpB,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;QAC9D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACrB,MAAkC,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACpF,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,SAA2B;IACpD,MAAM,UAAU,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAC;IACtD,MAAM,SAAS,GAAG,aAAa,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,YAAY,GAAG,UAAU,EAAE,CAAC;IAClC,MAAM,UAAU,GAAoB,EAAE,CAAC;IACvC,IAAI,YAAY,EAAE,CAAC;QACjB,UAAU,CAAC,IAAI,GAAG,YAAY,CAAC;IACjC,CAAC;IAED,sFAAsF;IACtF,MAAM,MAAM,GAAW;QACrB,GAAG,QAAQ;QACX,GAAG,UAAU;QACb,GAAG,UAAU;QACb,GAAG,SAAS;QACZ,GAAG,SAAS;KACb,CAAC;IAEF,6BAA6B;IAC7B,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,MAAM,CAAC,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACrD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAQ1C;;;GAGG;AACH,MAAM,UAAU,YAAY,CAC1B,KAAa,EACb,WAAmB,EACnB,YAAoB,EACpB,OAAqC;IAErC,MAAM,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC;IACzB,IAAI,CAAC,CAAC;QAAE,OAAO,CAAC,CAAC;IACjB,OAAO,CAAC,WAAW,GAAG,CAAC,CAAC,KAAK,GAAG,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC,GAAG,SAAS,CAAC;AACvE,CAAC;AAgDD,MAAM,QAAQ,GAAW;IACvB,IAAI,EAAE,EAAE;IACR,SAAS,EAAE,EAAE;IACb,OAAO,EAAE,CAAC;IACV,KAAK,EAAE,QAAQ;IACf,KAAK,EAAE,EAAE;IACT,WAAW,EAAE,EAAE;IACf,YAAY,EAAE,EAAE;IAChB,MAAM,EAAE,KAAK;IACb,UAAU,EAAE,QAAQ;IACpB,MAAM,EAAE,MAAM;IACd,UAAU,EAAE,OAAO;IACnB,cAAc,EAAE,CAAC;IACjB,WAAW,EAAE,WAAW;IACxB,UAAU,EAAE,UAAU;IACtB,SAAS,EAAE,KAAK;IAChB,UAAU,EAAE,KAAK;IACjB,WAAW,EAAE,KAAK;IAClB,aAAa,EAAE,KAAK;IACpB,UAAU,EAAE,KAAK;IACjB,SAAS,EAAE,KAAK;IAChB,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,CAAC;IACZ,kBAAkB,EAAE,CAAC;IACrB,SAAS,EAAE,EAAE;IACb,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,EAAE;IACX,WAAW,EAAE,IAAI;IACjB,OAAO,EAAE,KAAK;IACd,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,EAAE;IACb,YAAY,EAAE,EAAE;IAChB,OAAO,EAAE,mBAAmB;IAC5B,SAAS,EAAE,EAAE;IACb,QAAQ,EAAE,KAAK;IACf,WAAW,EAAE,GAAG;IAChB,WAAW,EAAE,IAAI;IACjB,qBAAqB,EAAE,KAAK;IAC5B,uBAAuB,EAAE,KAAK;IAC9B,OAAO,EAAE;QACP,iBAAiB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;QAChD,mBAAmB,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE;QACjD,kBAAkB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;QAChD,YAAY,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;KAC3C;CACF,CAAC;AAEF,gEAAgE;AAChE,MAAM,YAAY,GAAiC;IACjD,SAAS,EAAE,WAAW;IACtB,IAAI,EAAE,MAAM;IACZ,OAAO,EAAE,SAAS;IAClB,KAAK,EAAE,OAAO;IACd,KAAK,EAAE,OAAO;IACd,YAAY,EAAE,aAAa;IAC3B,aAAa,EAAE,cAAc;IAC7B,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,OAAO,EAAE,QAAQ;IACjB,KAAK,EAAE,YAAY;IACnB,gBAAgB,EAAE,gBAAgB;IAClC,YAAY,EAAE,aAAa;IAC3B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,WAAW,EAAE,YAAY;IACzB,YAAY,EAAE,aAAa;IAC3B,cAAc,EAAE,eAAe;IAC/B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,oBAAoB,EAAE,oBAAoB;IAC1C,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,YAAY,EAAE,aAAa;IAC3B,QAAQ,EAAE,SAAS;IACnB,OAAO,EAAE,SAAS;IAClB,aAAa,EAAE,cAAc;IAC7B,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,SAAS,EAAE,UAAU;IACrB,YAAY,EAAE,aAAa;IAC3B,YAAY,EAAE,aAAa;CAC5B,CAAC;AAEF,2CAA2C;AAC3C,MAAM,WAAW,GAAiC;IAChD,IAAI,EAAE,MAAM;IACZ,OAAO,EAAE,SAAS;IAClB,KAAK,EAAE,OAAO;IACd,KAAK,EAAE,OAAO;IACd,YAAY,EAAE,aAAa;IAC3B,aAAa,EAAE,cAAc;IAC7B,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,YAAY;IACzB,gBAAgB,EAAE,gBAAgB;IAClC,YAAY,EAAE,aAAa;IAC3B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,WAAW,EAAE,YAAY;IACzB,YAAY,EAAE,aAAa;IAC3B,cAAc,EAAE,eAAe;IAC/B,WAAW,EAAE,YAAY;IACzB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,oBAAoB,EAAE,oBAAoB;IAC1C,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,SAAS;IACnB,YAAY,EAAE,aAAa;IAC3B,QAAQ,EAAE,SAAS;IACnB,OAAO,EAAE,SAAS;IAClB,aAAa,EAAE,cAAc;IAC7B,QAAQ,EAAE,SAAS;IACnB,UAAU,EAAE,WAAW;IACvB,SAAS,EAAE,UAAU;IACrB,YAAY,EAAE,aAAa;IAC3B,YAAY,EAAE,aAAa;IAC3B,wBAAwB,EAAE,uBAAuB;IACjD,0BAA0B,EAAE,yBAAyB;CACtD,CAAC;AAEF,SAAS,MAAM,CAAC,KAAa,EAAE,OAAgB;IAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IACtD,IAAI,OAAO,OAAO,KAAK,SAAS;QAAE,OAAO,KAAK,KAAK,MAAM,IAAI,KAAK,KAAK,GAAG,CAAC;IAC3E,OAAO,KAAK,CAAC;AACf,CAAC;AAED,4GAA4G;AAC5G,MAAM,UAAU,kBAAkB,CAAC,KAAa,EAAE,IAAY;IAC5D,IAAI,KAAK,KAAK,EAAE;QAAE,OAAO,KAAK,CAAC;IAC/B,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACvC,MAAM,IAAI,KAAK,CAAC,WAAW,IAAI,iCAAiC,KAAK,EAAE,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,QAAQ,CAAC,2BAA2B,EAAE;YAChD,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC,IAAI,EAAE,CAAC;QAEV,2CAA2C;QAC3C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAC1D,IAAI,KAAK;YAAE,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QAE5C,qCAAqC;QACrC,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QACvD,IAAI,GAAG;YAAE,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACxC,CAAC;IAAC,MAAM,CAAC;QACP,8BAA8B;IAChC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,UAAkB;IACxC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,GAAG,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAmC,CAAC;IAChE,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IAErD,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;QAChE,IAAI,OAAO,IAAI,MAAM,EAAE,CAAC;YACrB,MAAkC,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,IAAI,MAAM,CAAC,YAAY,IAAI,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,EAAE,CAAC;QACnE,MAAM,EAAE,GAAG,MAAM,CAAC,YAAuC,CAAC;QAC1D,IAAI,EAAE,CAAC,MAAM,KAAK,KAAK;YAAE,MAAM,CAAC,qBAAqB,GAAG,IAAI,CAAC;QAC7D,IAAI,EAAE,CAAC,aAAa,KAAK,KAAK;YAAE,MAAM,CAAC,uBAAuB,GAAG,IAAI,CAAC;IACxE,CAAC;IAED,4DAA4D;IAC5D,IAAI,MAAM,CAAC,OAAO,IAAI,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzD,MAAM,OAAO,GAAsD,EAAE,CAAC;QACtE,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,OAAkC,CAAC,EAAE,CAAC;YACvF,MAAM,CAAC,GAAG,KAAgC,CAAC;YAC3C,IAAI,OAAO,CAAC,EAAE,KAAK,KAAK,QAAQ,IAAI,OAAO,CAAC,EAAE,MAAM,KAAK,QAAQ,EAAE,CAAC;gBAClE,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;YACxD,CAAC;QACH,CAAC;QACD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa;IACpB,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;QAC9D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACrB,MAAkC,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACpF,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,SAA2B;IACpD,MAAM,UAAU,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAC;IACtD,MAAM,SAAS,GAAG,aAAa,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,YAAY,GAAG,UAAU,EAAE,CAAC;IAClC,MAAM,UAAU,GAAoB,EAAE,CAAC;IACvC,IAAI,YAAY,EAAE,CAAC;QACjB,UAAU,CAAC,IAAI,GAAG,YAAY,CAAC;IACjC,CAAC;IAED,sFAAsF;IACtF,oFAAoF;IACpF,MAAM,aAAa,GAAG;QACpB,GAAG,QAAQ,CAAC,OAAO;QACnB,GAAG,UAAU,CAAC,OAAO;QACrB,GAAG,SAAS,EAAE,OAAO;KACtB,CAAC;IAEF,MAAM,MAAM,GAAW;QACrB,GAAG,QAAQ;QACX,GAAG,UAAU;QACb,GAAG,UAAU;QACb,GAAG,SAAS;QACZ,GAAG,SAAS;QACZ,OAAO,EAAE,aAAa;KACvB,CAAC;IAEF,kCAAkC;IAClC,MAAM,YAAY,GAAG,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAU,CAAC;IAC9D,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAoC,CAAC,EAAE,CAAC;QACxE,MAAM,IAAI,KAAK,CAAC,mBAAmB,MAAM,CAAC,KAAK,wBAAwB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACpG,CAAC;IAED,6BAA6B;IAC7B,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,MAAM,CAAC,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACrD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,91 @@
1
+ /** Result of running a single check. */
2
+ export type CheckResult = {
3
+ passed: boolean;
4
+ score: number;
5
+ detail: string;
6
+ };
7
+ /** Base check with a type discriminator. */
8
+ type BaseCheck = {
9
+ type: string;
10
+ };
11
+ /** All tests must pass. */
12
+ export type TestPassCheck = BaseCheck & {
13
+ type: 'test_pass';
14
+ };
15
+ /** A specific file must exist. */
16
+ export type FileExistsCheck = BaseCheck & {
17
+ type: 'file_exists';
18
+ path: string;
19
+ };
20
+ /** A pattern must exist in a file. */
21
+ export type GrepCheck = BaseCheck & {
22
+ type: 'grep';
23
+ file: string;
24
+ pattern: string;
25
+ };
26
+ /** HTTP endpoint check. */
27
+ export type HttpCheck = BaseCheck & {
28
+ type: 'http';
29
+ method: string;
30
+ path: string;
31
+ port?: number;
32
+ expect_status: number;
33
+ expect_body_contains?: string;
34
+ };
35
+ /** Diff size limit check. */
36
+ export type DiffSizeCheck = BaseCheck & {
37
+ type: 'diff_size';
38
+ max_files?: number;
39
+ max_lines?: number;
40
+ };
41
+ /** Keywords that must be present in output. */
42
+ export type KeywordPresentCheck = BaseCheck & {
43
+ type: 'keyword_present';
44
+ keywords: string[];
45
+ };
46
+ /** Keywords that must be absent from output. */
47
+ export type KeywordAbsentCheck = BaseCheck & {
48
+ type: 'keyword_absent';
49
+ keywords: string[];
50
+ };
51
+ /** LLM-judge evaluation. */
52
+ export type LlmJudgeCheck = BaseCheck & {
53
+ type: 'llm_judge';
54
+ model: string;
55
+ rubric: string;
56
+ min_score: number;
57
+ };
58
+ /** Union of all check types. */
59
+ export type CheckDefinition = TestPassCheck | FileExistsCheck | GrepCheck | HttpCheck | DiffSizeCheck | KeywordPresentCheck | KeywordAbsentCheck | LlmJudgeCheck;
60
+ /** Context passed to check runners. */
61
+ export type CheckContext = {
62
+ /** Working directory (worktree root). */
63
+ cwd: string;
64
+ /** Test command to run. */
65
+ testCommand?: string;
66
+ /** Agent output (for step evals). */
67
+ output?: string;
68
+ /** Git diff of changes. */
69
+ diff?: string;
70
+ /** List of changed files. */
71
+ filesChanged?: string[];
72
+ /** Model to use for LLM judge (fallback). */
73
+ judgeModel?: string;
74
+ };
75
+ /**
76
+ * Run a single check against the given context.
77
+ */
78
+ export declare function runCheck(check: CheckDefinition, ctx: CheckContext): Promise<CheckResult>;
79
+ /**
80
+ * Run all checks and return aggregate results.
81
+ */
82
+ export declare function runChecks(checks: CheckDefinition[], ctx: CheckContext): Promise<{
83
+ results: CheckResult[];
84
+ allPassed: boolean;
85
+ avgScore: number;
86
+ }>;
87
+ /**
88
+ * Parse check definitions from a checks.yaml object.
89
+ */
90
+ export declare function parseChecks(raw: unknown): CheckDefinition[];
91
+ export {};
@@ -0,0 +1,254 @@
1
+ /**
2
+ * Eval Check Executor — runs machine-checkable acceptance criteria.
3
+ *
4
+ * Check types:
5
+ * E2E: test_pass, file_exists, grep, http, diff_size
6
+ * Step: keyword_present, keyword_absent, llm_judge
7
+ *
8
+ * Each check returns { passed, score, detail }.
9
+ */
10
+ import { existsSync, readFileSync } from 'node:fs';
11
+ import { join } from 'node:path';
12
+ import { exec } from './shell.js';
13
+ import { spawnAgent } from './agent.js';
14
+ import { log } from './logger.js';
15
+ /**
16
+ * Run a single check against the given context.
17
+ */
18
+ export async function runCheck(check, ctx) {
19
+ switch (check.type) {
20
+ case 'test_pass':
21
+ return runTestPassCheck(check, ctx);
22
+ case 'file_exists':
23
+ return runFileExistsCheck(check, ctx);
24
+ case 'grep':
25
+ return runGrepCheck(check, ctx);
26
+ case 'http':
27
+ return runHttpCheck(check, ctx);
28
+ case 'diff_size':
29
+ return runDiffSizeCheck(check, ctx);
30
+ case 'keyword_present':
31
+ return runKeywordPresentCheck(check, ctx);
32
+ case 'keyword_absent':
33
+ return runKeywordAbsentCheck(check, ctx);
34
+ case 'llm_judge':
35
+ return runLlmJudgeCheck(check, ctx);
36
+ default:
37
+ return { passed: false, score: 0, detail: `Unknown check type: ${check.type}` };
38
+ }
39
+ }
40
+ /**
41
+ * Run all checks and return aggregate results.
42
+ */
43
+ export async function runChecks(checks, ctx) {
44
+ const results = [];
45
+ for (const check of checks) {
46
+ try {
47
+ const result = await runCheck(check, ctx);
48
+ results.push(result);
49
+ }
50
+ catch (err) {
51
+ results.push({
52
+ passed: false,
53
+ score: 0,
54
+ detail: `Check ${check.type} threw: ${err instanceof Error ? err.message : String(err)}`,
55
+ });
56
+ }
57
+ }
58
+ const allPassed = results.every((r) => r.passed);
59
+ const avgScore = results.length > 0
60
+ ? results.reduce((sum, r) => sum + r.score, 0) / results.length
61
+ : 0;
62
+ return { results, allPassed, avgScore };
63
+ }
64
+ // --- Individual check runners ---
65
+ async function runTestPassCheck(_check, ctx) {
66
+ const cmd = ctx.testCommand ?? 'npm test';
67
+ const result = exec(cmd, { cwd: ctx.cwd, timeout: 120_000 });
68
+ const passed = result.exitCode === 0;
69
+ return {
70
+ passed,
71
+ score: passed ? 1 : 0,
72
+ detail: passed ? 'All tests passed' : `Tests failed (exit ${result.exitCode}): ${result.stderr.slice(0, 500)}`,
73
+ };
74
+ }
75
+ async function runFileExistsCheck(check, ctx) {
76
+ const fullPath = join(ctx.cwd, check.path);
77
+ const passed = existsSync(fullPath);
78
+ return {
79
+ passed,
80
+ score: passed ? 1 : 0,
81
+ detail: passed ? `File exists: ${check.path}` : `File not found: ${check.path}`,
82
+ };
83
+ }
84
+ async function runGrepCheck(check, ctx) {
85
+ const fullPath = join(ctx.cwd, check.file);
86
+ if (!existsSync(fullPath)) {
87
+ return { passed: false, score: 0, detail: `File not found: ${check.file}` };
88
+ }
89
+ const content = readFileSync(fullPath, 'utf-8');
90
+ const regex = new RegExp(check.pattern);
91
+ const passed = regex.test(content);
92
+ return {
93
+ passed,
94
+ score: passed ? 1 : 0,
95
+ detail: passed ? `Pattern "${check.pattern}" found in ${check.file}` : `Pattern "${check.pattern}" not found in ${check.file}`,
96
+ };
97
+ }
98
+ async function runHttpCheck(check, ctx) {
99
+ try {
100
+ const url = `http://localhost:${check.port ?? 3000}${check.path}`;
101
+ const response = await fetch(url, { method: check.method, signal: AbortSignal.timeout(10_000) });
102
+ const body = await response.text();
103
+ const statusMatch = response.status === check.expect_status;
104
+ const bodyMatch = check.expect_body_contains ? body.includes(check.expect_body_contains) : true;
105
+ const passed = statusMatch && bodyMatch;
106
+ return {
107
+ passed,
108
+ score: passed ? 1 : 0,
109
+ detail: passed
110
+ ? `HTTP ${check.method} ${check.path} returned ${response.status}`
111
+ : `HTTP check failed: status=${response.status} (expected ${check.expect_status}), body match=${bodyMatch}`,
112
+ };
113
+ }
114
+ catch (err) {
115
+ return {
116
+ passed: false,
117
+ score: 0,
118
+ detail: `HTTP check failed: ${err instanceof Error ? err.message : String(err)}`,
119
+ };
120
+ }
121
+ }
122
+ async function runDiffSizeCheck(check, ctx) {
123
+ const filesCount = ctx.filesChanged?.length ?? 0;
124
+ const diff = ctx.diff ?? '';
125
+ const lineCount = diff.split('\n').filter((l) => l.startsWith('+') || l.startsWith('-')).length;
126
+ const fileOk = check.max_files == null || filesCount <= check.max_files;
127
+ const lineOk = check.max_lines == null || lineCount <= check.max_lines;
128
+ const passed = fileOk && lineOk;
129
+ return {
130
+ passed,
131
+ score: passed ? 1 : 0,
132
+ detail: `Diff: ${filesCount} files, ${lineCount} lines` +
133
+ (!fileOk ? ` (max ${check.max_files} files exceeded)` : '') +
134
+ (!lineOk ? ` (max ${check.max_lines} lines exceeded)` : ''),
135
+ };
136
+ }
137
+ async function runKeywordPresentCheck(check, ctx) {
138
+ const output = ctx.output ?? '';
139
+ const found = check.keywords.filter((kw) => output.includes(kw));
140
+ const missing = check.keywords.filter((kw) => !output.includes(kw));
141
+ const passed = missing.length === 0;
142
+ return {
143
+ passed,
144
+ score: check.keywords.length > 0 ? found.length / check.keywords.length : 1,
145
+ detail: passed
146
+ ? `All keywords found: ${check.keywords.join(', ')}`
147
+ : `Missing keywords: ${missing.join(', ')}`,
148
+ };
149
+ }
150
+ async function runKeywordAbsentCheck(check, ctx) {
151
+ const output = ctx.output ?? '';
152
+ const present = check.keywords.filter((kw) => output.includes(kw));
153
+ const passed = present.length === 0;
154
+ return {
155
+ passed,
156
+ score: passed ? 1 : 0,
157
+ detail: passed
158
+ ? `No forbidden keywords found`
159
+ : `Forbidden keywords present: ${present.join(', ')}`,
160
+ };
161
+ }
162
+ async function runLlmJudgeCheck(check, ctx) {
163
+ const model = check.model || ctx.judgeModel || 'claude-haiku-4-5';
164
+ const prompt = `You are an evaluation judge. Score the following output on a scale of 1-5 based on this rubric.
165
+
166
+ ## Rubric
167
+ ${check.rubric}
168
+
169
+ ## Output to evaluate
170
+ ${(ctx.output ?? '').slice(0, 8000)}
171
+
172
+ Respond with ONLY a single number (1-5) on the first line, followed by a brief explanation.`;
173
+ try {
174
+ const result = await spawnAgent({
175
+ agent: 'claude',
176
+ model,
177
+ prompt,
178
+ cwd: ctx.cwd,
179
+ timeout: 60_000,
180
+ maxTurns: 1,
181
+ });
182
+ const scoreMatch = result.output.match(/^(\d)/m);
183
+ const score = scoreMatch ? parseInt(scoreMatch[1], 10) : 0;
184
+ const normalizedScore = Math.max(0, Math.min(5, score));
185
+ const passed = normalizedScore >= check.min_score;
186
+ return {
187
+ passed,
188
+ score: normalizedScore / 5,
189
+ detail: `LLM judge score: ${normalizedScore}/5 (min: ${check.min_score}). ${result.output.slice(0, 200)}`,
190
+ };
191
+ }
192
+ catch (err) {
193
+ log.warn(`LLM judge check failed: ${err instanceof Error ? err.message : String(err)}`);
194
+ return {
195
+ passed: false,
196
+ score: 0,
197
+ detail: `LLM judge failed: ${err instanceof Error ? err.message : String(err)}`,
198
+ };
199
+ }
200
+ }
201
+ /**
202
+ * Parse check definitions from a checks.yaml object.
203
+ */
204
+ export function parseChecks(raw) {
205
+ if (!raw || typeof raw !== 'object')
206
+ return [];
207
+ const obj = raw;
208
+ const checks = Array.isArray(obj.checks) ? obj.checks : [];
209
+ return checks.map((c) => {
210
+ const type = String(c.type ?? '');
211
+ switch (type) {
212
+ case 'test_pass':
213
+ return { type: 'test_pass' };
214
+ case 'file_exists':
215
+ return { type: 'file_exists', path: String(c.path ?? '') };
216
+ case 'grep':
217
+ return { type: 'grep', file: String(c.file ?? ''), pattern: String(c.pattern ?? '') };
218
+ case 'http':
219
+ return {
220
+ type: 'http',
221
+ method: String(c.method ?? 'GET'),
222
+ path: String(c.path ?? '/'),
223
+ expect_status: Number(c.expect_status ?? 200),
224
+ expect_body_contains: c.expect_body_contains ? String(c.expect_body_contains) : undefined,
225
+ };
226
+ case 'diff_size':
227
+ return {
228
+ type: 'diff_size',
229
+ max_files: c.max_files != null ? Number(c.max_files) : undefined,
230
+ max_lines: c.max_lines != null ? Number(c.max_lines) : undefined,
231
+ };
232
+ case 'keyword_present':
233
+ return {
234
+ type: 'keyword_present',
235
+ keywords: Array.isArray(c.keywords) ? c.keywords.map(String) : [],
236
+ };
237
+ case 'keyword_absent':
238
+ return {
239
+ type: 'keyword_absent',
240
+ keywords: Array.isArray(c.keywords) ? c.keywords.map(String) : [],
241
+ };
242
+ case 'llm_judge':
243
+ return {
244
+ type: 'llm_judge',
245
+ model: String(c.model ?? ''),
246
+ rubric: String(c.rubric ?? ''),
247
+ min_score: Number(c.min_score ?? 3),
248
+ };
249
+ default:
250
+ return { type };
251
+ }
252
+ });
253
+ }
254
+ //# sourceMappingURL=eval-checks.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-checks.js","sourceRoot":"","sources":["../../src/lib/eval-checks.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,EAAE,GAAG,EAAE,MAAM,aAAa,CAAC;AA8FlC;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAsB,EAAE,GAAiB;IACtE,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,WAAW;YACd,OAAO,gBAAgB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACtC,KAAK,aAAa;YAChB,OAAO,kBAAkB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACxC,KAAK,MAAM;YACT,OAAO,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAClC,KAAK,MAAM;YACT,OAAO,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAClC,KAAK,WAAW;YACd,OAAO,gBAAgB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACtC,KAAK,iBAAiB;YACpB,OAAO,sBAAsB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAC5C,KAAK,gBAAgB;YACnB,OAAO,qBAAqB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAC3C,KAAK,WAAW;YACd,OAAO,gBAAgB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QACtC;YACE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,uBAAwB,KAAmB,CAAC,IAAI,EAAE,EAAE,CAAC;IACnG,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAyB,EAAE,GAAiB;IAK1E,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,SAAS,KAAK,CAAC,IAAI,WAAW,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;aACzF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;QACjC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM;QAC/D,CAAC,CAAC,CAAC,CAAC;IAEN,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC;AAED,mCAAmC;AAEnC,KAAK,UAAU,gBAAgB,CAAC,MAAqB,EAAE,GAAiB;IACtE,MAAM,GAAG,GAAG,GAAG,CAAC,WAAW,IAAI,UAAU,CAAC;IAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,KAAK,CAAC,CAAC;IACrC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,sBAAsB,MAAM,CAAC,QAAQ,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;KAC/G,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,KAAsB,EAAE,GAAiB;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IACpC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,gBAAgB,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,mBAAmB,KAAK,CAAC,IAAI,EAAE;KAChF,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,KAAgB,EAAE,GAAiB;IAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,mBAAmB,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;IAC9E,CAAC;IACD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACnC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,YAAY,KAAK,CAAC,OAAO,cAAc,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,YAAY,KAAK,CAAC,OAAO,kBAAkB,KAAK,CAAC,IAAI,EAAE;KAC/H,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,KAAgB,EAAE,GAAiB;IAC7D,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,oBAAoB,KAAK,CAAC,IAAI,IAAI,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAClE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACjG,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,KAAK,KAAK,CAAC,aAAa,CAAC;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAChG,MAAM,MAAM,GAAG,WAAW,IAAI,SAAS,CAAC;QACxC,OAAO;YACL,MAAM;YACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,MAAM,EAAE,MAAM;gBACZ,CAAC,CAAC,QAAQ,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,IAAI,aAAa,QAAQ,CAAC,MAAM,EAAE;gBAClE,CAAC,CAAC,6BAA6B,QAAQ,CAAC,MAAM,cAAc,KAAK,CAAC,aAAa,iBAAiB,SAAS,EAAE;SAC9G,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,CAAC;YACR,MAAM,EAAE,sBAAsB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;SACjF,CAAC;IACJ,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,KAAoB,EAAE,GAAiB;IACrE,MAAM,UAAU,GAAG,GAAG,CAAC,YAAY,EAAE,MAAM,IAAI,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;IAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;IAEhG,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,IAAI,IAAI,IAAI,UAAU,IAAI,KAAK,CAAC,SAAS,CAAC;IACxE,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,IAAI,IAAI,IAAI,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC;IACvE,MAAM,MAAM,GAAG,MAAM,IAAI,MAAM,CAAC;IAEhC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,SAAS,UAAU,WAAW,SAAS,QAAQ;YACrD,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,KAAK,CAAC,SAAS,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3D,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,KAAK,CAAC,SAAS,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;KAC9D,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,sBAAsB,CAAC,KAA0B,EAAE,GAAiB;IACjF,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC;IAChC,MAAM,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACpE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC;IACpC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,KAAK,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3E,MAAM,EAAE,MAAM;YACZ,CAAC,CAAC,uBAAuB,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YACpD,CAAC,CAAC,qBAAqB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;KAC9C,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,KAAyB,EAAE,GAAiB;IAC/E,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC;IAChC,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC;IACpC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,MAAM;YACZ,CAAC,CAAC,6BAA6B;YAC/B,CAAC,CAAC,+BAA+B,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;KACxD,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,KAAoB,EAAE,GAAiB;IACrE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAClE,MAAM,MAAM,GAAG;;;EAGf,KAAK,CAAC,MAAM;;;EAGZ,CAAC,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;;4FAEyD,CAAC;IAE3F,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC;YAC9B,KAAK,EAAE,QAAQ;YACf,KAAK;YACL,MAAM;YACN,GAAG,EAAE,GAAG,CAAC,GAAG;YACZ,OAAO,EAAE,MAAM;YACf,QAAQ,EAAE,CAAC;SACZ,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;QACxD,MAAM,MAAM,GAAG,eAAe,IAAI,KAAK,CAAC,SAAS,CAAC;QAElD,OAAO;YACL,MAAM;YACN,KAAK,EAAE,eAAe,GAAG,CAAC;YAC1B,MAAM,EAAE,oBAAoB,eAAe,YAAY,KAAK,CAAC,SAAS,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;SAC1G,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,2BAA2B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxF,OAAO;YACL,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,CAAC;YACR,MAAM,EAAE,qBAAqB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;SAChF,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,GAAY;IACtC,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IAC/C,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAE3D,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE;QAC/C,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QAClC,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,WAAW;gBACd,OAAO,EAAE,IAAI,EAAE,WAAW,EAAmB,CAAC;YAChD,KAAK,aAAa;gBAChB,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,EAAqB,CAAC;YAChF,KAAK,MAAM;gBACT,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,EAAe,CAAC;YACrG,KAAK,MAAM;gBACT,OAAO;oBACL,IAAI,EAAE,MAAM;oBACZ,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,GAAG,CAAC;oBAC3B,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC,aAAa,IAAI,GAAG,CAAC;oBAC7C,oBAAoB,EAAE,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,SAAS;iBAC7E,CAAC;YACjB,KAAK,WAAW;gBACd,OAAO;oBACL,IAAI,EAAE,WAAW;oBACjB,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;oBAChE,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;iBAChD,CAAC;YACrB,KAAK,iBAAiB;gBACpB,OAAO;oBACL,IAAI,EAAE,iBAAiB;oBACvB,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;iBAC3C,CAAC;YAC3B,KAAK,gBAAgB;gBACnB,OAAO;oBACL,IAAI,EAAE,gBAAgB;oBACtB,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;iBAC5C,CAAC;YAC1B,KAAK,WAAW;gBACd,OAAO;oBACL,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;oBAC5B,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC;oBAC9B,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC;iBACnB,CAAC;YACrB;gBACE,OAAO,EAAE,IAAI,EAAkC,CAAC;QACpD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,29 @@
1
+ import type { Config } from './config.js';
2
+ import type { CheckDefinition } from './eval-checks.js';
3
+ import type { EvalCase, EvalSuiteResult } from './eval.js';
4
+ /** Options for running the eval suite. */
5
+ export type EvalRunOptions = {
6
+ /** Only run this specific case ID (prefix match). */
7
+ caseId?: string;
8
+ /** Verbose output. */
9
+ verbose?: boolean;
10
+ };
11
+ /** Extended eval case with parsed check definitions. */
12
+ export type EvalCaseWithChecks = EvalCase & {
13
+ checks?: CheckDefinition[];
14
+ /** For step cases: raw input text. */
15
+ inputText?: string;
16
+ };
17
+ /**
18
+ * Run a full eval suite: iterate over cases, execute, collect results.
19
+ */
20
+ export declare function runEvalSuite(cases: EvalCaseWithChecks[], config: Config, options?: EvalRunOptions): Promise<EvalSuiteResult>;
21
+ /**
22
+ * Prepare a fixture directory for an e2e eval case.
23
+ * Clones the repo at the specified ref into a worktree.
24
+ */
25
+ export declare function prepareFixture(evalCase: EvalCase, projectDir: string): string;
26
+ /**
27
+ * Snapshot the current harness state (prompts + skills + config) as a hash.
28
+ */
29
+ export declare function snapshotHarness(config: Config): string;