evalbuff 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +79 -0
  2. package/dist/carve-features.d.ts +42 -0
  3. package/dist/carve-features.d.ts.map +1 -0
  4. package/dist/carve-features.js +305 -0
  5. package/dist/carve-features.js.map +1 -0
  6. package/dist/cli.d.ts +3 -0
  7. package/dist/cli.d.ts.map +1 -0
  8. package/dist/cli.js +42 -0
  9. package/dist/cli.js.map +1 -0
  10. package/dist/docs-refactor.d.ts +4 -0
  11. package/dist/docs-refactor.d.ts.map +1 -0
  12. package/dist/docs-refactor.js +122 -0
  13. package/dist/docs-refactor.js.map +1 -0
  14. package/dist/docs-writer.d.ts +4 -0
  15. package/dist/docs-writer.d.ts.map +1 -0
  16. package/dist/docs-writer.js +122 -0
  17. package/dist/docs-writer.js.map +1 -0
  18. package/dist/eval-helpers.d.ts +19 -0
  19. package/dist/eval-helpers.d.ts.map +1 -0
  20. package/dist/eval-helpers.js +327 -0
  21. package/dist/eval-helpers.js.map +1 -0
  22. package/dist/eval-runner.d.ts +42 -0
  23. package/dist/eval-runner.d.ts.map +1 -0
  24. package/dist/eval-runner.js +193 -0
  25. package/dist/eval-runner.js.map +1 -0
  26. package/dist/judge.d.ts +22 -0
  27. package/dist/judge.d.ts.map +1 -0
  28. package/dist/judge.js +284 -0
  29. package/dist/judge.js.map +1 -0
  30. package/dist/perfect-feature.d.ts +2 -0
  31. package/dist/perfect-feature.d.ts.map +1 -0
  32. package/dist/perfect-feature.js +666 -0
  33. package/dist/perfect-feature.js.map +1 -0
  34. package/dist/report.d.ts +31 -0
  35. package/dist/report.d.ts.map +1 -0
  36. package/dist/report.js +249 -0
  37. package/dist/report.js.map +1 -0
  38. package/dist/run-evalbuff.d.ts +12 -0
  39. package/dist/run-evalbuff.d.ts.map +1 -0
  40. package/dist/run-evalbuff.js +383 -0
  41. package/dist/run-evalbuff.js.map +1 -0
  42. package/dist/runners/claude.d.ts +10 -0
  43. package/dist/runners/claude.d.ts.map +1 -0
  44. package/dist/runners/claude.js +80 -0
  45. package/dist/runners/claude.js.map +1 -0
  46. package/dist/runners/codebuff.d.ts +24 -0
  47. package/dist/runners/codebuff.d.ts.map +1 -0
  48. package/dist/runners/codebuff.js +88 -0
  49. package/dist/runners/codebuff.js.map +1 -0
  50. package/dist/runners/codex.d.ts +8 -0
  51. package/dist/runners/codex.d.ts.map +1 -0
  52. package/dist/runners/codex.js +131 -0
  53. package/dist/runners/codex.js.map +1 -0
  54. package/dist/runners/index.d.ts +5 -0
  55. package/dist/runners/index.d.ts.map +1 -0
  56. package/dist/runners/index.js +4 -0
  57. package/dist/runners/index.js.map +1 -0
  58. package/dist/runners/runner.d.ts +11 -0
  59. package/dist/runners/runner.d.ts.map +1 -0
  60. package/dist/runners/runner.js +2 -0
  61. package/dist/runners/runner.js.map +1 -0
  62. package/dist/test-repo-utils.d.ts +21 -0
  63. package/dist/test-repo-utils.d.ts.map +1 -0
  64. package/dist/test-repo-utils.js +109 -0
  65. package/dist/test-repo-utils.js.map +1 -0
  66. package/dist/trace-compressor.d.ts +130 -0
  67. package/dist/trace-compressor.d.ts.map +1 -0
  68. package/dist/trace-compressor.js +680 -0
  69. package/dist/trace-compressor.js.map +1 -0
  70. package/dist/tui/data.d.ts +84 -0
  71. package/dist/tui/data.d.ts.map +1 -0
  72. package/dist/tui/data.js +80 -0
  73. package/dist/tui/data.js.map +1 -0
  74. package/dist/tui/events.d.ts +86 -0
  75. package/dist/tui/events.d.ts.map +1 -0
  76. package/dist/tui/events.js +52 -0
  77. package/dist/tui/events.js.map +1 -0
  78. package/dist/vendor/error.d.ts +18 -0
  79. package/dist/vendor/error.d.ts.map +1 -0
  80. package/dist/vendor/error.js +64 -0
  81. package/dist/vendor/error.js.map +1 -0
  82. package/dist/vendor/print-mode.d.ts +75 -0
  83. package/dist/vendor/print-mode.d.ts.map +1 -0
  84. package/dist/vendor/print-mode.js +2 -0
  85. package/dist/vendor/print-mode.js.map +1 -0
  86. package/package.json +46 -0
@@ -0,0 +1,383 @@
1
+ /**
2
+ * Evalbuff — iterative documentation optimization through feature carving.
3
+ *
4
+ * Pipeline:
5
+ * 1. Plan features to carve (GPT-5.4 via Codex SDK)
6
+ * 2. Carve a random subset of n features
7
+ * 3. Baseline: rebuild each in parallel (Claude Code), judge (Codex), get scores + doc suggestions
8
+ * 4. Loop N times:
9
+ * a. Docs refactor agent reads judge suggestions and edits all docs holistically
10
+ * b. Re-eval: rebuild in parallel, judge, get new scores + doc suggestions
11
+ *
12
+ * Usage:
13
+ * bun run src/run-evalbuff.ts --repo /path/to/repo [--n 5] [--parallelism 10] [--loops 3] [--init-command "npm install"]
14
+ */
15
+ import fs from 'fs';
16
+ import os from 'os';
17
+ import path from 'path';
18
+ import { planFeatures, carveFeature } from './carve-features';
19
+ import { collectDocSuggestions, runDocsWriterAgent } from './docs-writer';
20
+ import { selectRandom, getGroundTruthDiff, getDocsSnapshot, computeDocsDiffText } from './eval-helpers';
21
+ import { runAgentOnCarve, rejudgeBaselineWithCurrentDocs } from './eval-runner';
22
+ import { saveRoundResults, saveBaselineRejudgeResults, saveSummary } from './report';
23
+ import { events } from './tui/events';
24
+ // --- Eval round ---
25
+ async function runEvalRound(features, groundTruthDiffs, opts, round) {
26
+ console.log(`\n${'='.repeat(60)}`);
27
+ console.log(`ROUND ${round} — Evaluating ${features.length} features (parallelism=${opts.parallelism})`);
28
+ console.log(`${'='.repeat(60)}`);
29
+ // Run features with bounded concurrency
30
+ const results = [];
31
+ const queue = features.map((feature, i) => ({ feature, i }));
32
+ let next = 0;
33
+ async function worker() {
34
+ while (next < queue.length) {
35
+ const { feature, i } = queue[next++];
36
+ try {
37
+ events.send({ type: 'feature_status', featureId: feature.id, status: 'agent_running' });
38
+ const result = await runAgentOnCarve({
39
+ idx: i,
40
+ total: features.length,
41
+ repoPath: opts.repoPath,
42
+ feature,
43
+ initCommand: opts.initCommand,
44
+ model: opts.codingModel,
45
+ groundTruthDiff: groundTruthDiffs.get(feature.id) || '',
46
+ docsSourcePath: opts.repoPath,
47
+ });
48
+ results[i] = result;
49
+ events.send({ type: 'feature_status', featureId: feature.id, status: 'scored', score: result.score, cost: result.costEstimate });
50
+ }
51
+ catch (error) {
52
+ const msg = error instanceof Error ? error.message : String(error);
53
+ results[i] = {
54
+ featureId: feature.id,
55
+ prompt: feature.prompt,
56
+ score: -1,
57
+ diff: '',
58
+ trace: `Agent error: ${msg}`,
59
+ judging: {
60
+ analysis: `Agent failed: ${msg.slice(0, 500)}`,
61
+ strengths: [],
62
+ weaknesses: ['Agent failed due to infrastructure error'],
63
+ e2eTestsPerformed: [],
64
+ completionScore: -1,
65
+ codeQualityScore: -1,
66
+ e2eScore: -1,
67
+ overallScore: -1,
68
+ },
69
+ costEstimate: 0,
70
+ docsRead: [],
71
+ };
72
+ events.send({ type: 'feature_status', featureId: feature.id, status: 'eval_failed', detail: msg.slice(0, 200) });
73
+ }
74
+ }
75
+ }
76
+ await Promise.all(Array.from({ length: Math.min(opts.parallelism, features.length) }, () => worker()));
77
+ const valid = results.filter((r) => r.score >= 0);
78
+ const avgScore = valid.length > 0
79
+ ? valid.reduce((a, r) => a + r.score, 0) / valid.length
80
+ : 0;
81
+ const totalCost = results.reduce((a, r) => a + r.costEstimate, 0);
82
+ console.log(`\nRound ${round} results:`);
83
+ for (const r of results) {
84
+ const status = r.score >= 0 ? `${r.score.toFixed(1)}/10` : 'FAILED';
85
+ console.log(` ${r.featureId}: ${status}`);
86
+ }
87
+ console.log(` Average: ${avgScore.toFixed(1)}/10 (${valid.length}/${results.length} succeeded)`);
88
+ console.log(` Cost: $${totalCost.toFixed(2)}`);
89
+ events.send({
90
+ type: 'round_complete',
91
+ round,
92
+ avgScore,
93
+ totalCost,
94
+ scores: Object.fromEntries(results.map(r => [r.featureId, r.score])),
95
+ });
96
+ return { round, tasks: results, avgScore, totalCost };
97
+ }
98
+ // --- Baseline rejudge round ---
99
+ //
100
+ // Re-runs the judge on the baseline's stored diffs/traces after docs have been
101
+ // updated. The agent's work is fixed — only the docs given to the judge change.
102
+ // This lets us see whether score changes over loops reflect real agent
103
+ // improvement or merely judge recalibration from better docs.
104
+ async function runBaselineRejudgeRound(baseline, features, groundTruthDiffs, opts, loop) {
105
+ console.log(`\n${'-'.repeat(60)}`);
106
+ console.log(`BASELINE REJUDGE (loop ${loop}) — Re-scoring ${baseline.tasks.length} baseline diffs with current docs`);
107
+ console.log(`${'-'.repeat(60)}`);
108
+ const featureById = new Map(features.map(f => [f.id, f]));
109
+ const results = [];
110
+ const queue = baseline.tasks.map((baselineTask, i) => ({ baselineTask, i }));
111
+ let next = 0;
112
+ async function worker() {
113
+ while (next < queue.length) {
114
+ const { baselineTask, i } = queue[next++];
115
+ const feature = featureById.get(baselineTask.featureId);
116
+ // If baseline task itself failed (infra error) or we can't find the feature,
117
+ // carry the failure forward unchanged.
118
+ if (!feature || baselineTask.score < 0) {
119
+ results[i] = baselineTask;
120
+ continue;
121
+ }
122
+ try {
123
+ const judging = await rejudgeBaselineWithCurrentDocs({
124
+ idx: i,
125
+ total: queue.length,
126
+ repoPath: opts.repoPath,
127
+ feature,
128
+ baselineDiff: baselineTask.diff,
129
+ groundTruthDiff: groundTruthDiffs.get(feature.id) || '',
130
+ initCommand: opts.initCommand,
131
+ docsSourcePath: opts.repoPath,
132
+ });
133
+ results[i] = {
134
+ ...baselineTask,
135
+ score: judging.overallScore,
136
+ judging,
137
+ costEstimate: 0, // rejudge cost is tracked separately in the judge process
138
+ };
139
+ }
140
+ catch (error) {
141
+ const msg = error instanceof Error ? error.message : String(error);
142
+ console.warn(` [Rejudge] ${baselineTask.featureId} failed: ${msg.slice(0, 200)}`);
143
+ results[i] = {
144
+ ...baselineTask,
145
+ score: -1,
146
+ judging: {
147
+ analysis: `Rejudge failed: ${msg.slice(0, 500)}`,
148
+ strengths: [],
149
+ weaknesses: ['Rejudge failed'],
150
+ e2eTestsPerformed: [],
151
+ completionScore: -1,
152
+ codeQualityScore: -1,
153
+ e2eScore: -1,
154
+ overallScore: -1,
155
+ },
156
+ };
157
+ }
158
+ }
159
+ }
160
+ await Promise.all(Array.from({ length: Math.min(opts.parallelism, queue.length) }, () => worker()));
161
+ const valid = results.filter((r) => r.score >= 0);
162
+ const avgScore = valid.length > 0
163
+ ? valid.reduce((a, r) => a + r.score, 0) / valid.length
164
+ : 0;
165
+ console.log(`\nBaseline rejudge (loop ${loop}) results:`);
166
+ for (const r of results) {
167
+ const status = r.score >= 0 ? `${r.score.toFixed(1)}/10` : 'FAILED';
168
+ console.log(` ${r.featureId}: ${status}`);
169
+ }
170
+ console.log(` Average: ${avgScore.toFixed(1)}/10 (vs baseline ${baseline.avgScore.toFixed(1)}/10)`);
171
+ return { round: loop, tasks: results, avgScore, totalCost: 0 };
172
+ }
173
+ // --- Main orchestrator ---
174
+ export async function runEvalbuff(opts) {
175
+ const startTime = new Date().toISOString();
176
+ const logDir = path.join(os.tmpdir(), `evalbuff-run-${new Date().toISOString().slice(0, 19).replace(/:/g, '-')}`);
177
+ fs.mkdirSync(logDir, { recursive: true });
178
+ events.initLog(logDir);
179
+ events.send({
180
+ type: 'run_start',
181
+ repoPath: opts.repoPath,
182
+ n: opts.n,
183
+ loops: opts.loops,
184
+ parallelism: opts.parallelism,
185
+ codingModel: opts.codingModel,
186
+ docsModel: opts.docsModel,
187
+ logDir,
188
+ });
189
+ console.log(`\nEvalbuff Run`);
190
+ console.log(` Repo: ${opts.repoPath}`);
191
+ console.log(` Improvement loops: ${opts.loops}`);
192
+ console.log(` Coding model: ${opts.codingModel}`);
193
+ console.log(` Docs model: ${opts.docsModel}`);
194
+ console.log(` Log dir: ${logDir}`);
195
+ let features;
196
+ if (opts.cachedFeatures) {
197
+ // --- Load cached features ---
198
+ console.log(`\nLoading cached features from ${opts.cachedFeatures}`);
199
+ const cached = JSON.parse(fs.readFileSync(opts.cachedFeatures, 'utf-8'));
200
+ features = selectRandom(cached, opts.n);
201
+ console.log(` Loaded ${cached.length} features, selected ${features.length}: ${features.map(f => f.id).join(', ')}`);
202
+ events.send({ type: 'feature_planned', totalCandidates: cached.length, selectedIds: features.map(f => f.id) });
203
+ fs.writeFileSync(path.join(logDir, 'features.json'), JSON.stringify(features, null, 2));
204
+ }
205
+ else {
206
+ // --- Step 1: Plan features ---
207
+ console.log(` Features to carve: ${opts.n}`);
208
+ events.send({ type: 'phase_change', phase: 'planning', detail: 'Analyzing codebase...' });
209
+ console.log(`\n${'='.repeat(60)}`);
210
+ console.log('STEP 1: Planning features to carve...');
211
+ console.log(`${'='.repeat(60)}`);
212
+ const plan = await planFeatures(opts.repoPath);
213
+ console.log(`\nIdentified ${plan.candidates.length} candidates. Reasoning:\n${plan.reasoning.slice(0, 500)}`);
214
+ fs.writeFileSync(path.join(logDir, 'plan.json'), JSON.stringify(plan, null, 2));
215
+ // --- Step 2: Select random subset and carve ---
216
+ console.log(`\n${'='.repeat(60)}`);
217
+ console.log(`STEP 2: Selecting ${opts.n} random features and carving...`);
218
+ console.log(`${'='.repeat(60)}`);
219
+ const selected = selectRandom(plan.candidates, opts.n);
220
+ console.log(`Selected: ${selected.map((c) => c.id).join(', ')}`);
221
+ events.send({ type: 'feature_planned', totalCandidates: plan.candidates.length, selectedIds: selected.map(c => c.id) });
222
+ events.send({ type: 'phase_change', phase: 'carving', detail: `Carving ${selected.length} features...` });
223
+ features = [];
224
+ {
225
+ const carveQueue = [...selected];
226
+ let carveNext = 0;
227
+ const carveResults = new Array(carveQueue.length).fill(null);
228
+ async function carveWorker() {
229
+ while (carveNext < carveQueue.length) {
230
+ const idx = carveNext++;
231
+ const candidate = carveQueue[idx];
232
+ try {
233
+ events.send({ type: 'feature_status', featureId: candidate.id, status: 'carving' });
234
+ const carved = await carveFeature(opts.repoPath, candidate);
235
+ if (carved) {
236
+ carveResults[idx] = carved;
237
+ events.send({ type: 'feature_status', featureId: candidate.id, status: 'carved', detail: `${carved.operations.length} file operations` });
238
+ console.log(` Carved: ${carved.id} — ${carved.operations.length} file operations`);
239
+ }
240
+ }
241
+ catch (error) {
242
+ const msg = error instanceof Error ? error.message : String(error);
243
+ events.send({ type: 'feature_status', featureId: candidate.id, status: 'carve_failed', detail: msg.slice(0, 200) });
244
+ console.error(` Failed to carve ${candidate.id}: ${msg.slice(0, 200)}`);
245
+ }
246
+ }
247
+ }
248
+ await Promise.all(Array.from({ length: Math.min(opts.parallelism, carveQueue.length) }, () => carveWorker()));
249
+ for (const result of carveResults) {
250
+ if (result)
251
+ features.push(result);
252
+ }
253
+ }
254
+ }
255
+ if (features.length === 0) {
256
+ console.error('No features were successfully carved. Aborting.');
257
+ return;
258
+ }
259
+ // Pre-compute ground truth diffs
260
+ const groundTruthDiffs = new Map();
261
+ for (const feature of features) {
262
+ groundTruthDiffs.set(feature.id, getGroundTruthDiff(feature));
263
+ }
264
+ fs.writeFileSync(path.join(logDir, 'features.json'), JSON.stringify(features, null, 2));
265
+ // --- Step 3: Baseline evaluation ---
266
+ events.send({ type: 'phase_change', phase: 'evaluating', round: 0, detail: 'Baseline' });
267
+ console.log(`\n${'='.repeat(60)}`);
268
+ console.log('STEP 3: Baseline evaluation');
269
+ console.log(`${'='.repeat(60)}`);
270
+ const baseline = await runEvalRound(features, groundTruthDiffs, opts, 0);
271
+ saveRoundResults(logDir, baseline);
272
+ let totalCost = baseline.totalCost;
273
+ const roundResults = [baseline];
274
+ const baselineRejudgeResults = [];
275
+ let previousResults = baseline;
276
+ // --- Step 4: Improvement loops ---
277
+ for (let loop = 1; loop <= opts.loops; loop++) {
278
+ console.log(`\n${'*'.repeat(60)}`);
279
+ console.log(`IMPROVEMENT LOOP ${loop}/${opts.loops}`);
280
+ console.log(`${'*'.repeat(60)}`);
281
+ // 4a: Collect judge suggestions and run docs writer agent
282
+ const validTasks = previousResults.tasks.filter((t) => t.score >= 0);
283
+ const judgeSuggestions = collectDocSuggestions(validTasks);
284
+ events.send({ type: 'phase_change', phase: 'docs_writer', loop });
285
+ events.send({ type: 'docs_writer', action: 'start', loop, suggestionCount: judgeSuggestions.split('\n').filter(l => l.startsWith('-')).length });
286
+ console.log(`\n--- Step 4a: Docs writer with judge suggestions ---`);
287
+ const docsSnapshotBefore = getDocsSnapshot(opts.repoPath);
288
+ fs.writeFileSync(path.join(logDir, `judge-suggestions-loop-${loop}.txt`), judgeSuggestions);
289
+ await runDocsWriterAgent(opts.repoPath, judgeSuggestions, opts.docsModel);
290
+ events.send({ type: 'docs_writer', action: 'complete', loop });
291
+ // Save docs state and diff for this loop
292
+ const docsAfterRefactor = getDocsSnapshot(opts.repoPath);
293
+ const docsDiffText = computeDocsDiffText(docsSnapshotBefore, docsAfterRefactor);
294
+ fs.writeFileSync(path.join(logDir, `docs-diff-loop-${loop}.txt`), docsDiffText);
295
+ fs.writeFileSync(path.join(logDir, `docs-state-loop-${loop}.json`), JSON.stringify(docsAfterRefactor, null, 2));
296
+ // 4b: Re-eval with updated docs
297
+ events.send({ type: 'phase_change', phase: 'evaluating', round: loop, loop, detail: 'Re-eval with updated docs' });
298
+ console.log(`\n--- Step 4b: Re-evaluation with updated docs ---`);
299
+ const results = await runEvalRound(features, groundTruthDiffs, opts, loop);
300
+ saveRoundResults(logDir, results);
301
+ totalCost += results.totalCost;
302
+ roundResults.push(results);
303
+ previousResults = results;
304
+ // 4c: Re-judge the BASELINE diffs against the current docs. This tells us
305
+ // whether judge scores are drifting because of docs-informed recalibration
306
+ // rather than real agent improvement.
307
+ console.log(`\n--- Step 4c: Re-judging baseline with current docs ---`);
308
+ const rejudged = await runBaselineRejudgeRound(baseline, features, groundTruthDiffs, opts, loop);
309
+ saveBaselineRejudgeResults(logDir, rejudged);
310
+ baselineRejudgeResults.push(rejudged);
311
+ console.log(`\n Loop ${loop} complete. Score: ${baseline.avgScore.toFixed(1)} → ${results.avgScore.toFixed(1)}` +
312
+ ` (baseline rejudged: ${baseline.avgScore.toFixed(1)} → ${rejudged.avgScore.toFixed(1)})`);
313
+ }
314
+ // --- Summary ---
315
+ const endTime = new Date().toISOString();
316
+ const summary = {
317
+ repoPath: opts.repoPath,
318
+ startTime,
319
+ endTime,
320
+ featuresCarved: features.length,
321
+ rounds: roundResults.map((r) => ({
322
+ round: r.round,
323
+ avgScore: r.avgScore,
324
+ scores: Object.fromEntries(r.tasks.map((t) => [t.featureId, t.score])),
325
+ totalCost: r.totalCost,
326
+ })),
327
+ totalCost,
328
+ scoreProgression: roundResults.map((r) => r.avgScore),
329
+ baselineRejudgeProgression: baselineRejudgeResults.map((r) => r.avgScore),
330
+ };
331
+ saveSummary(logDir, summary, roundResults, opts, baselineRejudgeResults);
332
+ events.send({
333
+ type: 'run_complete',
334
+ scoreProgression: summary.scoreProgression,
335
+ totalCost,
336
+ duration: `${startTime} → ${endTime}`,
337
+ });
338
+ events.close();
339
+ console.log(`\n${'='.repeat(60)}`);
340
+ console.log('EVALBUFF RUN COMPLETE');
341
+ console.log(`${'='.repeat(60)}`);
342
+ console.log(` Duration: ${startTime} → ${endTime}`);
343
+ console.log(` Features: ${features.length}`);
344
+ console.log(` Total cost: $${totalCost.toFixed(2)}`);
345
+ console.log(` Score progression: ${summary.scoreProgression.map((s) => s.toFixed(1)).join(' → ')}`);
346
+ console.log(` Logs: ${logDir}`);
347
+ console.log(` Report: ${path.join(logDir, 'report.md')}`);
348
+ }
349
+ // --- CLI entry point ---
350
+ if (import.meta.main) {
351
+ const args = process.argv.slice(2);
352
+ const getArg = (name, defaultValue) => {
353
+ const idx = args.indexOf(`--${name}`);
354
+ if (idx >= 0 && idx + 1 < args.length)
355
+ return args[idx + 1];
356
+ if (defaultValue !== undefined)
357
+ return defaultValue;
358
+ throw new Error(`Missing required argument: --${name}`);
359
+ };
360
+ const hasArg = (name) => args.includes(`--${name}`);
361
+ const repoPath = getArg('repo');
362
+ const n = parseInt(getArg('n', '20'));
363
+ const parallelism = parseInt(getArg('parallelism', '10'));
364
+ const loops = parseInt(getArg('loops', '3'));
365
+ const initCommand = hasArg('init-command') ? getArg('init-command') : undefined;
366
+ const codingModel = getArg('coding-model', 'sonnet');
367
+ const docsModel = getArg('docs-model', 'opus');
368
+ const cachedFeatures = hasArg('cached-features') ? getArg('cached-features') : undefined;
369
+ runEvalbuff({
370
+ repoPath,
371
+ n,
372
+ parallelism,
373
+ loops,
374
+ initCommand,
375
+ codingModel,
376
+ docsModel,
377
+ cachedFeatures,
378
+ }).catch((error) => {
379
+ console.error('Evalbuff run failed:', error);
380
+ process.exit(1);
381
+ });
382
+ }
383
+ //# sourceMappingURL=run-evalbuff.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run-evalbuff.js","sourceRoot":"","sources":["../src/run-evalbuff.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,MAAM,IAAI,CAAA;AACnB,OAAO,EAAE,MAAM,IAAI,CAAA;AACnB,OAAO,IAAI,MAAM,MAAM,CAAA;AAEvB,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAA;AAC7D,OAAO,EAAE,qBAAqB,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAA;AACzE,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACvG,OAAO,EAAE,eAAe,EAAE,8BAA8B,EAAE,MAAM,eAAe,CAAA;AAC/E,OAAO,EAAE,gBAAgB,EAAE,0BAA0B,EAAE,WAAW,EAAE,MAAM,UAAU,CAAA;AACpF,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAA;AAmBrC,qBAAqB;AAErB,KAAK,UAAU,YAAY,CACzB,QAAyB,EACzB,gBAAqC,EACrC,IAAqB,EACrB,KAAa;IAEb,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAClC,OAAO,CAAC,GAAG,CAAC,SAAS,KAAK,iBAAiB,QAAQ,CAAC,MAAM,0BAA0B,IAAI,CAAC,WAAW,GAAG,CAAC,CAAA;IACxG,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAEhC,wCAAwC;IACxC,MAAM,OAAO,GAAiB,EAAE,CAAA;IAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;IAC5D,IAAI,IAAI,GAAG,CAAC,CAAA;IAEZ,KAAK,UAAU,MAAM;QACnB,OAAO,IAAI,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;YACpC,IAAI,CAAC;gBACH,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAA;gBACvF,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC;oBACnC,GAAG,EAAE,CAAC;oBACN,KAAK,EAAE,QAAQ,CAAC,MAAM;oBACtB,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,OAAO;oBACP,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,KAAK,EAAE,IAAI,CAAC,WAAW;oBACvB,eAAe,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,IAAI,EAAE;oBACvD,cAAc,EAAE,IAAI,CAAC,QAAQ;iBAC9B,CAAC,CAAA;gBACF,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,CAAA;gBACnB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC,CAAA;YAClI,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;gBAClE,OAAO,CAAC,CAAC,CAAC,GAAG;oBACX,SAAS,EAAE,OAAO,CAAC,EAAE;oBACrB,MAAM,EAAE,OAAO,CAAC,MAAM;oBACtB,KAAK,EAAE,CAAC,CAAC;oBACT,IAAI,EAAE,EAAE;oBACR,KAAK,EAAE,gBAAgB,GAAG,EAAE;oBAC5B,OAAO,EAAE;wBACP,QAAQ,EAAE,iBAAiB,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;wBAC9C,SAAS,EAAE,EAAE;wBACb,UAAU,EAAE,CAAC,0CAA0C,CAAC;wBACxD,iBAAiB,EAAE,EAAE;wBACrB,eAAe,EAAE,CAAC,CAAC;wBACnB,gBAAgB,EAAE,CAAC,CAAC;wBACpB,QAAQ,EAAE,CAAC,CAAC;wBACZ,YAAY,EAAE,CAAC,CAAC;qBACjB;oBACD,YAAY,EAAE,CAAC;oBACf,QAAQ,EAAE,EAAE;iBACb,CAAA;gBACD,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;YAClH,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC,CACpF,CAAA;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAA;IACjD,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC;QAC/B,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM;QACvD,CAAC,CAAC,CAAC,CAAA;IACL,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,CAAA;IAEjE,OAAO,CAAC,GAAG,CAAC,WAAW,KAAK,WAAW,CAAC,CAAA;IACxC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAA;QACnE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,SAAS,KAAK,MAAM,EAAE,CAAC,CAAA;IAC5C,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,aAAa,CAAC,CAAA;IACjG,OAAO,CAAC,GAAG,CAAC,YAAY,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;IAE/C,MAAM,CAAC,IAAI,CAAC;QACV,IAAI,EAAE,gBAAgB;QACtB,KAAK;QACL,QAAQ;QACR,SAAS;QACT,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;KACrE,CAAC,CAAA;IAEF,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAA;AACvD,CAAC;AAED,iCAAiC;AACjC,EAAE;AACF,+EAA+E;AAC/E,gFAAgF;AAChF,uEAAuE;AACvE,8DAA8D;AAE9D,KAAK,UAAU,uBAAuB,CACpC,QAAqB,EACrB,QAAyB,EACzB,gBAAqC,EACrC,IAAqB,EACrB,IAAY;IAEZ,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAClC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,kBAAkB,QAAQ,CAAC,KAAK,CAAC,MAAM,mCAAmC,CAAC,CAAA;IACrH,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAEhC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;IACzD,MAAM,OAAO,GAAiB,EAAE,CAAA;IAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;IAC5E,IAAI,IAAI,GAAG,CAAC,CAAA;IAEZ,KAAK,UAAU,MAAM;QACnB,OAAO,IAAI,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,EAAE,YAAY,EAAE,CAAC,EAAE,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;YACzC,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,YAAY,CAAC,SAAS,CAAC,CAAA;YAEvD,6EAA6E;YAC7E,uCAAuC;YACvC,IAAI,CAAC,OAAO,IAAI,YAAY,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;gBACvC,OAAO,CAAC,CAAC,CAAC,GAAG,YAAY,CAAA;gBACzB,SAAQ;YACV,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,8BAA8B,CAAC;oBACnD,GAAG,EAAE,CAAC;oBACN,KAAK,EAAE,KAAK,CAAC,MAAM;oBACnB,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,OAAO;oBACP,YAAY,EAAE,YAAY,CAAC,IAAI;oBAC/B,eAAe,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,IAAI,EAAE;oBACvD,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,cAAc,EAAE,IAAI,CAAC,QAAQ;iBAC9B,CAAC,CAAA;gBACF,OAAO,CAAC,CAAC,CAAC,GAAG;oBACX,GAAG,YAAY;oBACf,KAAK,EAAE,OAAO,CAAC,YAAY;oBAC3B,OAAO;oBACP,YAAY,EAAE,CAAC,EAAE,0DAA0D;iBAC5E,CAAA;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;gBAClE,OAAO,CAAC,IAAI,CAAC,eAAe,YAAY,CAAC,SAAS,YAAY,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;gBAClF,OAAO,CAAC,CAAC,CAAC,GAAG;oBACX,GAAG,YAAY;oBACf,KAAK,EAAE,CAAC,CAAC;oBACT,OAAO,EAAE;wBACP,QAAQ,EAAE,mBAAmB,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;wBAChD,SAAS,EAAE,EAAE;wBACb,UAAU,EAAE,CAAC,gBAAgB,CAAC;wBAC9B,iBAAiB,EAAE,EAAE;wBACrB,eAAe,EAAE,CAAC,CAAC;wBACnB,gBAAgB,EAAE,CAAC,CAAC;wBACpB,QAAQ,EAAE,CAAC,CAAC;wBACZ,YAAY,EAAE,CAAC,CAAC;qBACjB;iBACF,CAAA;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC,CACjF,CAAA;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAA;IACjD,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC;QAC/B,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM;QACvD,CAAC,CAAC,CAAC,CAAA;IAEL,OAAO,CAAC,GAAG,CAAC,4BAA4B,IAAI,YAAY,CAAC,CAAA;IACzD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAA;QACnE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,SAAS,KAAK,MAAM,EAAE,CAAC,CAAA;IAC5C,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAA;IAEpG,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE,CAAA;AAChE,CAAC;AAED,4BAA4B;AAE5B,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAAqB;IACrD,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,gBAAgB,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;IACjH,EAAE,CAAC,SAAS,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAEzC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAA;IACtB,MAAM,CAAC,IAAI,CAAC;QACV,IAAI,EAAE,WAAW;QACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,CAAC,EAAE,IAAI,CAAC,CAAC;QACT,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,MAAM;KACP,CAAC,CAAA;IAEF,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAA;IAC7B,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAA;IACvC,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,CAAC,KAAK,EAAE,CAAC,CAAA;IACjD,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,WAAW,EAAE,CAAC,CAAA;IAClD,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;IAC9C,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,EAAE,CAAC,CAAA;IAEnC,IAAI,QAAyB,CAAA;IAE7B,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;QACxB,+BAA+B;QAC/B,OAAO,CAAC,GAAG,CAAC,kCAAkC,IAAI,CAAC,cAAc,EAAE,CAAC,CAAA;QACpE,MAAM,MAAM,GAAoB,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAA;QACzF,QAAQ,GAAG,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,CAAA;QACvC,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,uBAAuB,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QAErH,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QAC9G,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,eAAe,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAA;IACzF,CAAC;SAAM,CAAC;QACN,gCAAgC;QAChC,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,CAAC,CAAC,EAAE,CAAC,CAAA;QAC7C,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,uBAAuB,EAAE,CAAC,CAAA;QACzF,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QAClC,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAA;QACpD,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QAEhC,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QAC9C,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,CAAC,UAAU,CAAC,MAAM,4BAA4B,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;QAE7G,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAA;QAE/E,iDAAiD;QACjD,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QAClC,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,CAAC,CAAC,iCAAiC,CAAC,CAAA;QACzE,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QAEhC,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC,CAAA;QACtD,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QAEhE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,eAAe,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QACvH,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,QAAQ,CAAC,MAAM,cAAc,EAAE,CAAC,CAAA;QAEzG,QAAQ,GAAG,EAAE,CAAA;QACb,CAAC;YACC,MAAM,UAAU,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAA;YAChC,IAAI,SAAS,GAAG,CAAC,CAAA;YACjB,MAAM,YAAY,GAA6B,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAEtF,KAAK,UAAU,WAAW;gBACxB,OAAO,SAAS,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC;oBACrC,MAAM,GAAG,GAAG,SAAS,EAAE,CAAA;oBACvB,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAA;oBACjC,IAAI,CAAC;wBACH,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;wBACnF,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAA;wBAC3D,IAAI,MAAM,EAAE,CAAC;4BACX,YAAY,CAAC,GAAG,CAAC,GAAG,MAAM,CAAA;4BAC1B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,kBAAkB,EAAE,CAAC,CAAA;4BACzI,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,EAAE,MAAM,MAAM,CAAC,UAAU,CAAC,MAAM,kBAAkB,CAAC,CAAA;wBACrF,CAAC;oBACH,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;wBAClE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;wBACnH,OAAO,CAAC,KAAK,CAAC,qBAAqB,SAAS,CAAC,EAAE,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;oBAC1E,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,UAAU,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,CAC3F,CAAA;YACD,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;gBAClC,IAAI,MAAM;oBAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,KAAK,CAAC,iDAAiD,CAAC,CAAA;QAChE,OAAM;IACR,CAAC;IAED,iCAAiC;IACjC,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAkB,CAAA;IAClD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAA;IAC/D,CAAC;IAED,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,eAAe,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAA;IAEvF,sCAAsC;IACtC,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAA;IACxF,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAClC,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAA;IAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAEhC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,QAAQ,EAAE,gBAAgB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAA;IACxE,gBAAgB,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAA;IAElC,IAAI,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAA;IAClC,MAAM,YAAY,GAAkB,CAAC,QAAQ,CAAC,CAAA;IAC9C,MAAM,sBAAsB,GAAkB,EAAE,CAAA;IAChD,IAAI,eAAe,GAAG,QAAQ,CAAA;IAE9B,oCAAoC;IACpC,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QAClC,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAA;QACrD,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;QAEhC,0DAA0D;QAC1D,MAAM,UAAU,GAAG,eAAe,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAA;QACpE,MAAM,gBAAgB,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAA;QAE1D,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAA;QACjE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,eAAe,EAAE,gBAAgB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAA;QAEhJ,OAAO,CAAC,GAAG,CAAC,uDAAuD,CAAC,CAAA;QACpE,MAAM,kBAAkB,GAAG,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QAEzD,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,0BAA0B,IAAI,MAAM,CAAC,EAAE,gBAAgB,CAAC,CAAA;QAE3F,MAAM,kBAAkB,CAAC,IAAI,CAAC,QAAQ,EAAE,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,CAAA;QACzE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAA;QAE9D,yCAAyC;QACzC,MAAM,iBAAiB,GAAG,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACxD,MAAM,YAAY,GAAG,mBAAmB,CAAC,kBAAkB,EAAE,iBAAiB,CAAC,CAAA;QAC/E,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,IAAI,MAAM,CAAC,EAAE,YAAY,CAAC,CAAA;QAC/E,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,mBAAmB,IAAI,OAAO,CAAC,EACjD,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC,CAC3C,CAAA;QAED,gCAAgC;QAChC,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,2BAA2B,EAAE,CAAC,CAAA;QAClH,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAA;QACjE,MAAM,OAAO,GAAG,MAAM,YAAY,CAAC,QAAQ,EAAE,gBAAgB,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QAC1E,gBAAgB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QAEjC,SAAS,IAAI,OAAO,CAAC,SAAS,CAAA;QAC9B,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC1B,eAAe,GAAG,OAAO,CAAA;QAEzB,0EAA0E;QAC1E,2EAA2E;QAC3E,sCAAsC;QACtC,OAAO,CAAC,GAAG,CAAC,0DAA0D,CAAC,CAAA;QACvE,MAAM,QAAQ,GAAG,MAAM,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,EAAE,gBAAgB,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QAChG,0BAA0B,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAA;QAC5C,sBAAsB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QAErC,OAAO,CAAC,GAAG,CACT,YAAY,IAAI,qBAAqB,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YACpG,wBAAwB,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC1F,CAAA;IACH,CAAC;IAED,kBAAkB;IAClB,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAExC,MAAM,OAAO,GAAgB;QAC3B,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,SAAS;QACT,OAAO;QACP,cAAc,EAAE,QAAQ,CAAC,MAAM;QAC/B,MAAM,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC/B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,QAAQ,EAAE,CAAC,CAAC,QAAQ;YACpB,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YACtE,SAAS,EAAE,CAAC,CAAC,SAAS;SACvB,CAAC,CAAC;QACH,SAAS;QACT,gBAAgB,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QACrD,0BAA0B,EAAE,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;KAC1E,CAAA;IAED,WAAW,CAAC,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,sBAAsB,CAAC,CAAA;IAExE,MAAM,CAAC,IAAI,CAAC;QACV,IAAI,EAAE,cAAc;QACpB,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,SAAS;QACT,QAAQ,EAAE,GAAG,SAAS,MAAM,OAAO,EAAE;KACtC,CAAC,CAAA;IACF,MAAM,CAAC,KAAK,EAAE,CAAA;IAEd,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAClC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAA;IACpC,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAA;IAChC,OAAO,CAAC,GAAG,CAAC,eAAe,SAAS,MAAM,OAAO,EAAE,CAAC,CAAA;IACpD,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAA;IAC7C,OAAO,CAAC,GAAG,CAAC,kBAAkB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;IACrD,OAAO,CAAC,GAAG,CAAC,wBAAwB,OAAO,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;IACpG,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,EAAE,CAAC,CAAA;IAChC,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,CAAC,CAAA;AAC5D,CAAC;AAED,0BAA0B;AAE1B,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IACrB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;IAElC,MAAM,MAAM,GAAG,CAAC,IAAY,EAAE,YAAqB,EAAU,EAAE;QAC7D,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC,CAAA;QACrC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,CAAA;QAC3D,IAAI,YAAY,KAAK,SAAS;YAAE,OAAO,YAAY,CAAA;QACnD,MAAM,IAAI,KAAK,CAAC,gCAAgC,IAAI,EAAE,CAAC,CAAA;IACzD,CAAC,CAAA;IACD,MAAM,MAAM,GAAG,CAAC,IAAY,EAAW,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,EAAE,CAAC,CAAA;IAEpE,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAA;IAC/B,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAA;IACrC,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,aAAa,EAAE,IAAI,CAAC,CAAC,CAAA;IACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAA;IAC5C,MAAM,WAAW,GAAG,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;IAC/E,MAAM,WAAW,GAAG,MAAM,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAA;IACpD,MAAM,SAAS,GAAG,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAA;IAC9C,MAAM,cAAc,GAAG,MAAM,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;IAExF,WAAW,CAAC;QACV,QAAQ;QACR,CAAC;QACD,WAAW;QACX,KAAK;QACL,WAAW;QACX,WAAW;QACX,SAAS;QACT,cAAc;KACf,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;QACjB,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC,CAAA;QAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;AACJ,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { Runner, RunnerResult } from './runner';
2
+ export declare class ClaudeRunner implements Runner {
3
+ private cwd;
4
+ private env;
5
+ private model;
6
+ private effort?;
7
+ constructor(cwd: string, env?: Record<string, string>, model?: string, effort?: string);
8
+ run(prompt: string): Promise<RunnerResult>;
9
+ }
10
+ //# sourceMappingURL=claude.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../src/runners/claude.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAa,MAAM,UAAU,CAAA;AAE/D,qBAAa,YAAa,YAAW,MAAM;IACzC,OAAO,CAAC,GAAG,CAAQ;IACnB,OAAO,CAAC,GAAG,CAAwB;IACnC,OAAO,CAAC,KAAK,CAAQ;IACrB,OAAO,CAAC,MAAM,CAAC,CAAQ;gBAGrB,GAAG,EAAE,MAAM,EACX,GAAG,GAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAM,EAChC,KAAK,GAAE,MAAmC,EAC1C,MAAM,CAAC,EAAE,MAAM;IAQX,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;CAmEjD"}
@@ -0,0 +1,80 @@
1
+ import { execSync } from 'child_process';
2
+ import { query } from '@anthropic-ai/claude-agent-sdk';
3
+ import { captureGitDiff } from '../eval-helpers';
4
+ export class ClaudeRunner {
5
+ cwd;
6
+ env;
7
+ model;
8
+ effort;
9
+ constructor(cwd, env = {}, model = 'claude-opus-4-5-20251101', effort) {
10
+ this.cwd = cwd;
11
+ this.env = env;
12
+ this.model = model;
13
+ this.effort = effort;
14
+ }
15
+ async run(prompt) {
16
+ const steps = [];
17
+ let totalCostUsd = 0;
18
+ const baseSha = execSync('git rev-parse HEAD', {
19
+ cwd: this.cwd,
20
+ encoding: 'utf-8',
21
+ stdio: ['ignore', 'pipe', 'ignore'],
22
+ }).trim();
23
+ console.log(`[ClaudeRunner] Running with model ${this.model} in ${this.cwd}`);
24
+ const session = query({
25
+ prompt,
26
+ options: {
27
+ cwd: this.cwd,
28
+ model: this.model,
29
+ effort: this.effort || 'high',
30
+ permissionMode: 'bypassPermissions',
31
+ allowDangerouslySkipPermissions: true,
32
+ persistSession: false,
33
+ settingSources: ['project'],
34
+ env: {
35
+ ...process.env,
36
+ ...this.env,
37
+ ANTHROPIC_API_KEY: process.env.CLAUDE_CODE_KEY || process.env.ANTHROPIC_API_KEY || '',
38
+ },
39
+ },
40
+ });
41
+ for await (const message of session) {
42
+ if (message.type === 'assistant') {
43
+ const content = message.message?.content;
44
+ if (Array.isArray(content)) {
45
+ for (const block of content) {
46
+ if (block.type === 'text') {
47
+ steps.push({ type: 'text', text: block.text });
48
+ process.stdout.write(block.text);
49
+ }
50
+ else if (block.type === 'tool_use') {
51
+ steps.push({
52
+ type: 'tool_call',
53
+ toolName: block.name,
54
+ toolCallId: block.id,
55
+ input: block.input || {},
56
+ });
57
+ }
58
+ }
59
+ }
60
+ }
61
+ else if (message.type === 'result') {
62
+ totalCostUsd = message.total_cost_usd || 0;
63
+ }
64
+ }
65
+ // Get git diff after Claude has made changes
66
+ let diff = '';
67
+ try {
68
+ diff = captureGitDiff(this.cwd, { baseRef: baseSha });
69
+ }
70
+ catch {
71
+ // Ignore git errors
72
+ }
73
+ return {
74
+ steps,
75
+ totalCostUsd,
76
+ diff,
77
+ };
78
+ }
79
+ }
80
+ //# sourceMappingURL=claude.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude.js","sourceRoot":"","sources":["../../src/runners/claude.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAA;AAExC,OAAO,EAAE,KAAK,EAAE,MAAM,gCAAgC,CAAA;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAIhD,MAAM,OAAO,YAAY;IACf,GAAG,CAAQ;IACX,GAAG,CAAwB;IAC3B,KAAK,CAAQ;IACb,MAAM,CAAS;IAEvB,YACE,GAAW,EACX,MAA8B,EAAE,EAChC,QAAgB,0BAA0B,EAC1C,MAAe;QAEf,IAAI,CAAC,GAAG,GAAG,GAAG,CAAA;QACd,IAAI,CAAC,GAAG,GAAG,GAAG,CAAA;QACd,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QAClB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,MAAc;QACtB,MAAM,KAAK,GAAgB,EAAE,CAAA;QAC7B,IAAI,YAAY,GAAG,CAAC,CAAA;QACpB,MAAM,OAAO,GAAG,QAAQ,CAAC,oBAAoB,EAAE;YAC7C,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;SACpC,CAAC,CAAC,IAAI,EAAE,CAAA;QAET,OAAO,CAAC,GAAG,CAAC,qCAAqC,IAAI,CAAC,KAAK,OAAO,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;QAE7E,MAAM,OAAO,GAAG,KAAK,CAAC;YACpB,MAAM;YACN,OAAO,EAAE;gBACP,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,MAAM,EAAG,IAAI,CAAC,MAAc,IAAI,MAAM;gBACtC,cAAc,EAAE,mBAAmB;gBACnC,+BAA+B,EAAE,IAAI;gBACrC,cAAc,EAAE,KAAK;gBACrB,cAAc,EAAE,CAAC,SAAS,CAAC;gBAC3B,GAAG,EAAE;oBACH,GAAG,OAAO,CAAC,GAA6B;oBACxC,GAAG,IAAI,CAAC,GAAG;oBACX,iBAAiB,EACf,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;iBACrE;aACF;SACF,CAAC,CAAA;QAEF,IAAI,KAAK,EAAE,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;YACpC,IAAI,OAAO,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACjC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,EAAE,OAAO,CAAA;gBACxC,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC3B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;wBAC5B,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;4BAC1B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;4BAC9C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;wBAClC,CAAC;6BAAM,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;4BACrC,KAAK,CAAC,IAAI,CAAC;gCACT,IAAI,EAAE,WAAW;gCACjB,QAAQ,EAAE,KAAK,CAAC,IAAI;gCACpB,UAAU,EAAE,KAAK,CAAC,EAAE;gCACpB,KAAK,EAAG,KAAK,CAAC,KAA6B,IAAI,EAAE;6BAClD,CAAC,CAAA;wBACJ,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACrC,YAAY,GAAG,OAAO,CAAC,cAAc,IAAI,CAAC,CAAA;YAC5C,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAI,IAAI,GAAG,EAAE,CAAA;QACb,IAAI,CAAC;YACH,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAA;QACvD,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;QAED,OAAO;YACL,KAAK;YACL,YAAY;YACZ,IAAI;SACL,CAAA;IACH,CAAC;CACF"}
@@ -0,0 +1,24 @@
1
+ import type { Runner, RunnerResult } from './runner';
2
+ import type { CodebuffClient } from '@codebuff/sdk';
3
+ export declare class CodebuffRunner implements Runner {
4
+ private cwd;
5
+ private env?;
6
+ private client;
7
+ private agentId;
8
+ private localAgentDefinitions;
9
+ private printEvents;
10
+ private commitId;
11
+ private parentSha;
12
+ constructor(options: {
13
+ cwd: string;
14
+ env?: Record<string, string>;
15
+ client: CodebuffClient;
16
+ agentId: string;
17
+ localAgentDefinitions: any[];
18
+ printEvents: boolean;
19
+ commitId: string;
20
+ parentSha: string;
21
+ });
22
+ run(prompt: string): Promise<RunnerResult>;
23
+ }
24
+ //# sourceMappingURL=codebuff.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"codebuff.d.ts","sourceRoot":"","sources":["../../src/runners/codebuff.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAa,MAAM,UAAU,CAAA;AAC/D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAA;AAKnD,qBAAa,cAAe,YAAW,MAAM;IAC3C,OAAO,CAAC,GAAG,CAAQ;IACnB,OAAO,CAAC,GAAG,CAAC,CAAwB;IACpC,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,OAAO,CAAQ;IACvB,OAAO,CAAC,qBAAqB,CAAO;IACpC,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,QAAQ,CAAQ;IACxB,OAAO,CAAC,SAAS,CAAQ;gBAEb,OAAO,EAAE;QACnB,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;QAC5B,MAAM,EAAE,cAAc,CAAA;QACtB,OAAO,EAAE,MAAM,CAAA;QACf,qBAAqB,EAAE,GAAG,EAAE,CAAA;QAC5B,WAAW,EAAE,OAAO,CAAA;QACpB,QAAQ,EAAE,MAAM,CAAA;QAChB,SAAS,EAAE,MAAM,CAAA;KAClB;IAWK,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;CAmGjD"}
@@ -0,0 +1,88 @@
1
+ import { execSync } from 'child_process';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { captureGitDiff } from '../eval-helpers';
5
+ const DEBUG_ERROR = true;
6
+ export class CodebuffRunner {
7
+ cwd;
8
+ env;
9
+ client;
10
+ agentId;
11
+ localAgentDefinitions;
12
+ printEvents;
13
+ commitId;
14
+ parentSha;
15
+ constructor(options) {
16
+ this.cwd = options.cwd;
17
+ this.env = options.env;
18
+ this.client = options.client;
19
+ this.agentId = options.agentId;
20
+ this.localAgentDefinitions = options.localAgentDefinitions;
21
+ this.printEvents = options.printEvents;
22
+ this.commitId = options.commitId;
23
+ this.parentSha = options.parentSha;
24
+ }
25
+ async run(prompt) {
26
+ const steps = [];
27
+ let totalCostUsd = 0;
28
+ const baseSha = execSync('git rev-parse HEAD', {
29
+ cwd: this.cwd,
30
+ encoding: 'utf-8',
31
+ stdio: ['ignore', 'pipe', 'ignore'],
32
+ }).trim();
33
+ const maxAgentSteps = 40;
34
+ const result = await this.client.run({
35
+ agent: this.agentId,
36
+ prompt,
37
+ agentDefinitions: this.localAgentDefinitions,
38
+ cwd: this.cwd,
39
+ env: this.env,
40
+ maxAgentSteps,
41
+ handleEvent: (event) => {
42
+ if ((event.type === 'tool_call' || event.type === 'tool_result') &&
43
+ event.toolName === 'set_messages') {
44
+ return;
45
+ }
46
+ if (event.type === 'error') {
47
+ console.error(`[${this.commitId}:${this.agentId}] Error event:`, event.message);
48
+ if (DEBUG_ERROR && !event.message.startsWith('Invalid JSON')) {
49
+ // Save errors in a file, but not tool calls with invalid json.
50
+ fs.writeFileSync(path.join(__dirname, '..', `${this.commitId}-${this.agentId}-error-${Math.random().toString(36).substring(2, 6)}.json`), JSON.stringify({
51
+ error: event.message,
52
+ trace: steps,
53
+ }, null, 2));
54
+ }
55
+ }
56
+ else if (this.printEvents) {
57
+ console.log(`[${this.commitId}:${this.agentId}]`, JSON.stringify(event, null, 2));
58
+ }
59
+ steps.push(event);
60
+ },
61
+ });
62
+ if (result.output.type === 'error') {
63
+ console.error(`[${this.commitId}:${this.agentId}] Error:`, result.output.message);
64
+ if (DEBUG_ERROR) {
65
+ // Save errors in a file, but not tool calls with invalid json.
66
+ fs.writeFileSync(path.join(__dirname, '..', `${this.commitId}-${this.agentId}-error-${Math.random().toString(36).substring(2, 6)}.json`), JSON.stringify({
67
+ ...result.output,
68
+ trace: steps,
69
+ }, null, 2));
70
+ }
71
+ }
72
+ totalCostUsd = (result.sessionState?.mainAgentState.creditsUsed ?? 0) / 100;
73
+ // Get git diff after Codebuff has made changes
74
+ let diff = '';
75
+ try {
76
+ diff = captureGitDiff(this.cwd, { baseRef: baseSha });
77
+ }
78
+ catch {
79
+ // Ignore git errors
80
+ }
81
+ return {
82
+ steps,
83
+ totalCostUsd,
84
+ diff,
85
+ };
86
+ }
87
+ }
88
+ //# sourceMappingURL=codebuff.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"codebuff.js","sourceRoot":"","sources":["../../src/runners/codebuff.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAA;AACxC,OAAO,EAAE,MAAM,IAAI,CAAA;AACnB,OAAO,IAAI,MAAM,MAAM,CAAA;AAEvB,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAMhD,MAAM,WAAW,GAAG,IAAI,CAAA;AAExB,MAAM,OAAO,cAAc;IACjB,GAAG,CAAQ;IACX,GAAG,CAAyB;IAC5B,MAAM,CAAgB;IACtB,OAAO,CAAQ;IACf,qBAAqB,CAAO;IAC5B,WAAW,CAAS;IACpB,QAAQ,CAAQ;IAChB,SAAS,CAAQ;IAEzB,YAAY,OASX;QACC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAA;QACtB,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAA;QACtB,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAA;QAC5B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAA;QAC9B,IAAI,CAAC,qBAAqB,GAAG,OAAO,CAAC,qBAAqB,CAAA;QAC1D,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAA;QACtC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAA;QAChC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAA;IACpC,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,MAAc;QACtB,MAAM,KAAK,GAAgB,EAAE,CAAA;QAC7B,IAAI,YAAY,GAAG,CAAC,CAAA;QAEpB,MAAM,OAAO,GAAG,QAAQ,CAAC,oBAAoB,EAAE;YAC7C,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;SACpC,CAAC,CAAC,IAAI,EAAE,CAAA;QAET,MAAM,aAAa,GAAG,EAAE,CAAA;QACxB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC;YACnC,KAAK,EAAE,IAAI,CAAC,OAAO;YACnB,MAAM;YACN,gBAAgB,EAAE,IAAI,CAAC,qBAAqB;YAC5C,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,aAAa;YACb,WAAW,EAAE,CAAC,KAAK,EAAE,EAAE;gBACrB,IACE,CAAC,KAAK,CAAC,IAAI,KAAK,WAAW,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,CAAC;oBAC5D,KAAK,CAAC,QAAQ,KAAK,cAAc,EACjC,CAAC;oBACD,OAAM;gBACR,CAAC;gBACD,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;oBAC3B,OAAO,CAAC,KAAK,CACX,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,gBAAgB,EACjD,KAAK,CAAC,OAAO,CACd,CAAA;oBACD,IAAI,WAAW,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;wBAC7D,+DAA+D;wBAC/D,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CACP,SAAS,EACT,IAAI,EACJ,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,UAAU,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,OAAO,CAC5F,EACD,IAAI,CAAC,SAAS,CACZ;4BACE,KAAK,EAAE,KAAK,CAAC,OAAO;4BACpB,KAAK,EAAE,KAAK;yBACb,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAA;oBACH,CAAC;gBACH,CAAC;qBAAM,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;oBAC5B,OAAO,CAAC,GAAG,CACT,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,GAAG,EACpC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAC/B,CAAA;gBACH,CAAC;gBACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACnB,CAAC;SACF,CAAC,CAAA;QAEF,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YACnC,OAAO,CAAC,KAAK,CACX,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,UAAU,EAC3C,MAAM,CAAC,MAAM,CAAC,OAAO,CACtB,CAAA;YACD,IAAI,WAAW,EAAE,CAAC;gBAChB,+DAA+D;gBAC/D,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CACP,SAAS,EACT,IAAI,EACJ,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,UAAU,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,OAAO,CAC5F,EACD,IAAI,CAAC,SAAS,CACZ;oBACE,GAAG,MAAM,CAAC,MAAM;oBAChB,KAAK,EAAE,KAAK;iBACb,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAA;YACH,CAAC;QACH,CAAC;QAED,YAAY,GAAG,CAAC,MAAM,CAAC,YAAY,EAAE,cAAc,CAAC,WAAW,IAAI,CAAC,CAAC,GAAG,GAAG,CAAA;QAE3E,+CAA+C;QAC/C,IAAI,IAAI,GAAG,EAAE,CAAA;QACb,IAAI,CAAC;YACH,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAA;QACvD,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;QAED,OAAO;YACL,KAAK;YACL,YAAY;YACZ,IAAI;SACL,CAAA;IACH,CAAC;CACF"}