agent-eval-opencode 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/cli.d.ts +6 -0
  2. package/dist/cli.d.ts.map +1 -0
  3. package/dist/cli.js +590 -0
  4. package/dist/cli.js.map +1 -0
  5. package/dist/index.d.ts +30 -0
  6. package/dist/index.d.ts.map +1 -0
  7. package/dist/index.js +29 -0
  8. package/dist/index.js.map +1 -0
  9. package/dist/lib/agents/claude-code.d.ts +12 -0
  10. package/dist/lib/agents/claude-code.d.ts.map +1 -0
  11. package/dist/lib/agents/claude-code.js +231 -0
  12. package/dist/lib/agents/claude-code.js.map +1 -0
  13. package/dist/lib/agents/codex.d.ts +12 -0
  14. package/dist/lib/agents/codex.d.ts.map +1 -0
  15. package/dist/lib/agents/codex.js +267 -0
  16. package/dist/lib/agents/codex.js.map +1 -0
  17. package/dist/lib/agents/cursor.d.ts +10 -0
  18. package/dist/lib/agents/cursor.d.ts.map +1 -0
  19. package/dist/lib/agents/cursor.js +204 -0
  20. package/dist/lib/agents/cursor.js.map +1 -0
  21. package/dist/lib/agents/gemini.d.ts +10 -0
  22. package/dist/lib/agents/gemini.d.ts.map +1 -0
  23. package/dist/lib/agents/gemini.js +207 -0
  24. package/dist/lib/agents/gemini.js.map +1 -0
  25. package/dist/lib/agents/index.d.ts +7 -0
  26. package/dist/lib/agents/index.d.ts.map +1 -0
  27. package/dist/lib/agents/index.js +20 -0
  28. package/dist/lib/agents/index.js.map +1 -0
  29. package/dist/lib/agents/opencode.d.ts +11 -0
  30. package/dist/lib/agents/opencode.d.ts.map +1 -0
  31. package/dist/lib/agents/opencode.js +245 -0
  32. package/dist/lib/agents/opencode.js.map +1 -0
  33. package/dist/lib/agents/registry.d.ts +23 -0
  34. package/dist/lib/agents/registry.d.ts.map +1 -0
  35. package/dist/lib/agents/registry.js +35 -0
  36. package/dist/lib/agents/registry.js.map +1 -0
  37. package/dist/lib/agents/shared.d.ts +83 -0
  38. package/dist/lib/agents/shared.d.ts.map +1 -0
  39. package/dist/lib/agents/shared.js +192 -0
  40. package/dist/lib/agents/shared.js.map +1 -0
  41. package/dist/lib/agents/types.d.ts +73 -0
  42. package/dist/lib/agents/types.d.ts.map +1 -0
  43. package/dist/lib/agents/types.js +5 -0
  44. package/dist/lib/agents/types.js.map +1 -0
  45. package/dist/lib/classifier.d.ts +89 -0
  46. package/dist/lib/classifier.d.ts.map +1 -0
  47. package/dist/lib/classifier.js +285 -0
  48. package/dist/lib/classifier.js.map +1 -0
  49. package/dist/lib/config.d.ts +37 -0
  50. package/dist/lib/config.d.ts.map +1 -0
  51. package/dist/lib/config.js +187 -0
  52. package/dist/lib/config.js.map +1 -0
  53. package/dist/lib/dashboard.d.ts +65 -0
  54. package/dist/lib/dashboard.d.ts.map +1 -0
  55. package/dist/lib/dashboard.js +237 -0
  56. package/dist/lib/dashboard.js.map +1 -0
  57. package/dist/lib/docker-sandbox.d.ts +92 -0
  58. package/dist/lib/docker-sandbox.d.ts.map +1 -0
  59. package/dist/lib/docker-sandbox.js +375 -0
  60. package/dist/lib/docker-sandbox.js.map +1 -0
  61. package/dist/lib/fingerprint.d.ts +15 -0
  62. package/dist/lib/fingerprint.d.ts.map +1 -0
  63. package/dist/lib/fingerprint.js +59 -0
  64. package/dist/lib/fingerprint.js.map +1 -0
  65. package/dist/lib/fixture.d.ts +55 -0
  66. package/dist/lib/fixture.d.ts.map +1 -0
  67. package/dist/lib/fixture.js +215 -0
  68. package/dist/lib/fixture.js.map +1 -0
  69. package/dist/lib/housekeeping.d.ts +26 -0
  70. package/dist/lib/housekeeping.d.ts.map +1 -0
  71. package/dist/lib/housekeeping.js +170 -0
  72. package/dist/lib/housekeeping.js.map +1 -0
  73. package/dist/lib/init.d.ts +21 -0
  74. package/dist/lib/init.d.ts.map +1 -0
  75. package/dist/lib/init.js +275 -0
  76. package/dist/lib/init.js.map +1 -0
  77. package/dist/lib/o11y/index.d.ts +13 -0
  78. package/dist/lib/o11y/index.d.ts.map +1 -0
  79. package/dist/lib/o11y/index.js +13 -0
  80. package/dist/lib/o11y/index.js.map +1 -0
  81. package/dist/lib/o11y/parsers/claude-code.d.ts +18 -0
  82. package/dist/lib/o11y/parsers/claude-code.d.ts.map +1 -0
  83. package/dist/lib/o11y/parsers/claude-code.js +343 -0
  84. package/dist/lib/o11y/parsers/claude-code.js.map +1 -0
  85. package/dist/lib/o11y/parsers/codex.d.ts +17 -0
  86. package/dist/lib/o11y/parsers/codex.d.ts.map +1 -0
  87. package/dist/lib/o11y/parsers/codex.js +364 -0
  88. package/dist/lib/o11y/parsers/codex.js.map +1 -0
  89. package/dist/lib/o11y/parsers/cursor.d.ts +21 -0
  90. package/dist/lib/o11y/parsers/cursor.d.ts.map +1 -0
  91. package/dist/lib/o11y/parsers/cursor.js +226 -0
  92. package/dist/lib/o11y/parsers/cursor.js.map +1 -0
  93. package/dist/lib/o11y/parsers/gemini.d.ts +21 -0
  94. package/dist/lib/o11y/parsers/gemini.d.ts.map +1 -0
  95. package/dist/lib/o11y/parsers/gemini.js +241 -0
  96. package/dist/lib/o11y/parsers/gemini.js.map +1 -0
  97. package/dist/lib/o11y/parsers/index.d.ts +55 -0
  98. package/dist/lib/o11y/parsers/index.d.ts.map +1 -0
  99. package/dist/lib/o11y/parsers/index.js +284 -0
  100. package/dist/lib/o11y/parsers/index.js.map +1 -0
  101. package/dist/lib/o11y/parsers/opencode.d.ts +17 -0
  102. package/dist/lib/o11y/parsers/opencode.d.ts.map +1 -0
  103. package/dist/lib/o11y/parsers/opencode.js +320 -0
  104. package/dist/lib/o11y/parsers/opencode.js.map +1 -0
  105. package/dist/lib/o11y/types.d.ts +113 -0
  106. package/dist/lib/o11y/types.d.ts.map +1 -0
  107. package/dist/lib/o11y/types.js +6 -0
  108. package/dist/lib/o11y/types.js.map +1 -0
  109. package/dist/lib/results.d.ts +91 -0
  110. package/dist/lib/results.d.ts.map +1 -0
  111. package/dist/lib/results.js +361 -0
  112. package/dist/lib/results.js.map +1 -0
  113. package/dist/lib/runner.d.ts +71 -0
  114. package/dist/lib/runner.d.ts.map +1 -0
  115. package/dist/lib/runner.js +267 -0
  116. package/dist/lib/runner.js.map +1 -0
  117. package/dist/lib/sandbox.d.ts +173 -0
  118. package/dist/lib/sandbox.d.ts.map +1 -0
  119. package/dist/lib/sandbox.js +337 -0
  120. package/dist/lib/sandbox.js.map +1 -0
  121. package/dist/lib/types.d.ts +258 -0
  122. package/dist/lib/types.d.ts.map +1 -0
  123. package/dist/lib/types.js +15 -0
  124. package/dist/lib/types.js.map +1 -0
  125. package/dist/test-setup.d.ts +2 -0
  126. package/dist/test-setup.d.ts.map +1 -0
  127. package/dist/test-setup.js +6 -0
  128. package/dist/test-setup.js.map +1 -0
  129. package/package.json +72 -0
@@ -0,0 +1,275 @@
1
+ /**
2
+ * Project initialization - create new eval projects.
3
+ */
4
+ import { mkdirSync, writeFileSync, existsSync } from 'fs';
5
+ import { join, dirname } from 'path';
6
+ import pkg from '../../package.json' with { type: 'json' };
7
+ /**
8
+ * Get the package.json template.
9
+ */
10
+ function getPackageJson(projectName) {
11
+ return JSON.stringify({
12
+ name: projectName,
13
+ version: '0.0.1',
14
+ private: true,
15
+ type: 'module',
16
+ devDependencies: {
17
+ '@vercel/agent-eval': `^${pkg.version}`,
18
+ '@types/node': '^22.0.0',
19
+ typescript: '^5.6.0',
20
+ vitest: '^2.1.0',
21
+ },
22
+ }, null, 2);
23
+ }
24
+ /**
25
+ * Get the .env.example template.
26
+ */
27
+ function getEnvExample() {
28
+ return `# OpenCode agent uses credentials from your local OpenCode installation.
29
+ # No API keys are needed here -- auth is read from ~/.local/share/opencode/auth.json
30
+ # and ~/.config/github-copilot/apps.json automatically.
31
+
32
+ # To use the Vercel sandbox instead of Docker, set these:
33
+ # VERCEL_TOKEN=your-vercel-token
34
+ # VERCEL_TEAM_ID=your-team-id
35
+ # VERCEL_PROJECT_ID=your-project-id
36
+ `;
37
+ }
38
+ /**
39
+ * Get the .gitignore template.
40
+ */
41
+ function getGitignore() {
42
+ return `node_modules/
43
+ dist/
44
+ .env
45
+ .env.local
46
+ results/
47
+ *.log
48
+ .DS_Store
49
+ `;
50
+ }
51
+ /**
52
+ * Get the README.md template.
53
+ */
54
+ function getReadme() {
55
+ return `# Agent Evaluation Suite
56
+
57
+ Test AI coding agents to measure what actually works.
58
+
59
+ ## Prerequisites
60
+
61
+ - **Docker** running (Colima, Docker Desktop, or OrbStack)
62
+ - **OpenCode** authenticated with GitHub Copilot (\`~/.local/share/opencode/auth.json\` must exist)
63
+
64
+ ## Setup
65
+
66
+ 1. **Install dependencies:**
67
+ \`\`\`bash
68
+ npm install
69
+ \`\`\`
70
+
71
+ ## Running Evals
72
+
73
+ ### Preview (no cost)
74
+
75
+ See what will run without making API calls:
76
+
77
+ \`\`\`bash
78
+ npx @vercel/agent-eval opencode --dry
79
+ \`\`\`
80
+
81
+ ### Run Experiments
82
+
83
+ \`\`\`bash
84
+ npx @vercel/agent-eval opencode
85
+ \`\`\`
86
+
87
+ ### View Results
88
+
89
+ Launch the web-based results viewer:
90
+
91
+ \`\`\`bash
92
+ npx @vercel/agent-eval playground
93
+ \`\`\`
94
+
95
+ Open [http://localhost:3000](http://localhost:3000) to browse results.
96
+
97
+ `;
98
+ }
99
+ function getOpencodeExperiment() {
100
+ return `import type { ExperimentConfig } from '@vercel/agent-eval';
101
+
102
+ const config: ExperimentConfig = {
103
+ agent: 'opencode',
104
+ model: 'github-copilot/claude-opus-4.6',
105
+ sandbox: 'docker',
106
+ runs: 1,
107
+ earlyExit: true,
108
+ scripts: ['build'],
109
+ timeout: 600,
110
+ };
111
+
112
+ export default config;
113
+ `;
114
+ }
115
+ /**
116
+ * Get the example eval fixture PROMPT.md.
117
+ */
118
+ function getExamplePrompt() {
119
+ return `Add a greeting message below the heading that says "Welcome, user!"
120
+
121
+ Requirements:
122
+ - Add a paragraph element below the h1
123
+ - The text should be exactly "Welcome, user!"
124
+ - Keep the existing heading unchanged
125
+ `;
126
+ }
127
+ /**
128
+ * Get the example eval fixture EVAL.ts.
129
+ */
130
+ function getExampleEval() {
131
+ return `import { readFileSync } from 'fs';
132
+ import { execSync } from 'child_process';
133
+ import { test, expect } from 'vitest';
134
+
135
+ test('greeting message exists in source', () => {
136
+ const content = readFileSync('src/App.tsx', 'utf-8');
137
+ expect(content).toContain('Welcome, user!');
138
+ });
139
+
140
+ test('app still builds', () => {
141
+ // This throws if the build fails
142
+ execSync('npm run build', { stdio: 'pipe' });
143
+ });
144
+ `;
145
+ }
146
+ /**
147
+ * Get the example eval fixture package.json.
148
+ */
149
+ function getExamplePackageJson() {
150
+ return JSON.stringify({
151
+ name: 'add-greeting',
152
+ type: 'module',
153
+ scripts: {
154
+ build: 'tsc',
155
+ },
156
+ dependencies: {
157
+ react: '^18.0.0',
158
+ },
159
+ devDependencies: {
160
+ '@types/react': '^18.0.0',
161
+ typescript: '^5.0.0',
162
+ vitest: '^2.1.0',
163
+ },
164
+ }, null, 2);
165
+ }
166
+ /**
167
+ * Get the root tsconfig.json for the project.
168
+ */
169
+ function getRootTsconfig() {
170
+ return JSON.stringify({
171
+ compilerOptions: {
172
+ target: 'ES2022',
173
+ module: 'NodeNext',
174
+ moduleResolution: 'NodeNext',
175
+ strict: true,
176
+ skipLibCheck: true,
177
+ noEmit: true,
178
+ lib: ['ES2022'],
179
+ },
180
+ include: ['experiments'],
181
+ }, null, 2);
182
+ }
183
+ /**
184
+ * Get the example eval fixture tsconfig.json.
185
+ */
186
+ function getExampleTsconfig() {
187
+ return JSON.stringify({
188
+ compilerOptions: {
189
+ target: 'ES2020',
190
+ module: 'ESNext',
191
+ moduleResolution: 'bundler',
192
+ jsx: 'react-jsx',
193
+ strict: true,
194
+ outDir: 'dist',
195
+ skipLibCheck: true,
196
+ },
197
+ include: ['src'],
198
+ }, null, 2);
199
+ }
200
+ /**
201
+ * Get the example eval fixture App.tsx.
202
+ */
203
+ function getExampleApp() {
204
+ return `export function App() {
205
+ return (
206
+ <div>
207
+ <h1>Hello World</h1>
208
+ {/* TODO: Add greeting message here */}
209
+ </div>
210
+ );
211
+ }
212
+
213
+ export default App;
214
+ `;
215
+ }
216
+ /**
217
+ * Get all template files for a new project.
218
+ */
219
+ function getTemplateFiles(projectName) {
220
+ return [
221
+ { path: 'package.json', content: getPackageJson(projectName) },
222
+ { path: 'tsconfig.json', content: getRootTsconfig() },
223
+ { path: '.env.example', content: getEnvExample() },
224
+ { path: '.gitignore', content: getGitignore() },
225
+ { path: 'README.md', content: getReadme() },
226
+ { path: 'experiments/opencode.ts', content: getOpencodeExperiment() },
227
+ { path: 'evals/add-greeting/PROMPT.md', content: getExamplePrompt() },
228
+ { path: 'evals/add-greeting/EVAL.ts', content: getExampleEval() },
229
+ { path: 'evals/add-greeting/package.json', content: getExamplePackageJson() },
230
+ { path: 'evals/add-greeting/tsconfig.json', content: getExampleTsconfig() },
231
+ { path: 'evals/add-greeting/src/App.tsx', content: getExampleApp() },
232
+ ];
233
+ }
234
+ /**
235
+ * Initialize a new eval project.
236
+ */
237
+ export function initProject(options) {
238
+ const targetDir = options.targetDir ?? process.cwd();
239
+ const projectDir = join(targetDir, options.name);
240
+ // Check if directory already exists
241
+ if (existsSync(projectDir)) {
242
+ throw new Error(`Directory already exists: ${projectDir}`);
243
+ }
244
+ // Create project directory
245
+ mkdirSync(projectDir, { recursive: true });
246
+ // Write all template files
247
+ const files = getTemplateFiles(options.name);
248
+ for (const file of files) {
249
+ const filePath = join(projectDir, file.path);
250
+ const fileDir = dirname(filePath);
251
+ // Create parent directories
252
+ mkdirSync(fileDir, { recursive: true });
253
+ // Write file
254
+ writeFileSync(filePath, file.content);
255
+ }
256
+ return projectDir;
257
+ }
258
+ /**
259
+ * Get instructions for after project creation.
260
+ */
261
+ export function getPostInitInstructions(projectDir, projectName) {
262
+ return `
263
+ Project created at: ${projectDir}
264
+
265
+ Next steps:
266
+ 1. cd ${projectName}
267
+ 2. npm install
268
+ 3. npx @vercel/agent-eval opencode
269
+
270
+ Prerequisites:
271
+ - Docker running (Colima, Docker Desktop, or OrbStack)
272
+ - OpenCode authenticated with GitHub Copilot
273
+ `;
274
+ }
275
+ //# sourceMappingURL=init.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,GAAG,MAAM,oBAAoB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AAoB3D;;GAEG;AACH,SAAS,cAAc,CAAC,WAAmB;IACzC,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,IAAI;QACb,IAAI,EAAE,QAAQ;QACd,eAAe,EAAE;YACf,oBAAoB,EAAE,IAAI,GAAG,CAAC,OAAO,EAAE;YACvC,aAAa,EAAE,SAAS;YACxB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;CAQR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,YAAY;IACnB,OAAO;;;;;;;CAOR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,SAAS;IAChB,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA0CR,CAAC;AACF,CAAC;AAED,SAAS,qBAAqB;IAC5B,OAAO;;;;;;;;;;;;;CAaR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB;IACvB,OAAO;;;;;;CAMR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,cAAc;IACrB,OAAO;;;;;;;;;;;;;CAaR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB;IAC5B,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE;YACP,KAAK,EAAE,KAAK;SACb;QACD,YAAY,EAAE;YACZ,KAAK,EAAE,SAAS;SACjB;QACD,eAAe,EAAE;YACf,cAAc,EAAE,SAAS;YACzB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,eAAe;IACtB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,UAAU;YAClB,gBAAgB,EAAE,UAAU;YAC5B,MAAM,EAAE,IAAI;YACZ,YAAY,EAAE,IAAI;YAClB,MAAM,EAAE,IAAI;YACZ,GAAG,EAAE,CAAC,QAAQ,CAAC;SAChB;QACD,OAAO,EAAE,CAAC,aAAa,CAAC;KACzB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,QAAQ;YAChB,gBAAgB,EAAE,SAAS;YAC3B,GAAG,EAAE,WAAW;YAChB,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,MAAM;YACd,YAAY,EAAE,IAAI;SACnB;QACD,OAAO,EAAE,CAAC,KAAK,CAAC;KACjB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;;;CAUR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,WAAmB;IAC3C,OAAO;QACL,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,cAAc,CAAC,WAAW,CAAC,EAAE;QAC9D,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE;QACrD,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;QAClD,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE;QAC/C,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE;QAC3C,EAAE,IAAI,EAAE,yBAAyB,EAAE,OAAO,EAAE,qBAAqB,EAAE,EAAE;QACrE,EAAE,IAAI,EAAE,8BAA8B,EAAE,OAAO,EAAE,gBAAgB,EAAE,EAAE;QACrE,EAAE,IAAI,EAAE,4BAA4B,EAAE,OAAO,EAAE,cAAc,EAAE,EAAE;QACjE,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,qBAAqB,EAAE,EAAE;QAC7E,EAAE,IAAI,EAAE,kCAAkC,EAAE,OAAO,EAAE,kBAAkB,EAAE,EAAE;QAC3E,EAAE,IAAI,EAAE,gCAAgC,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;KACrE,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAAoB;IAC9C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAEjD,oCAAoC;IACpC,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,2BAA2B;IAC3B,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE3C,2BAA2B;IAC3B,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAElC,4BAA4B;QAC5B,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAExC,aAAa;QACb,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,UAAkB,EAAE,WAAmB;IAC7E,OAAO;sBACa,UAAU;;;UAGtB,WAAW;;;;;;;CAOpB,CAAC;AACF,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Observability module for agent-eval.
3
+ * Provides transcript parsing and analysis across all agents.
4
+ */
5
+ export type { ToolName, TranscriptEvent, WebFetchInfo, FileOperationInfo, ShellCommandInfo, TranscriptSummary, Transcript, } from './types.js';
6
+ export { parseTranscript, parseTranscriptSummary, loadTranscript, SUPPORTED_AGENTS } from './parsers/index.js';
7
+ export type { ParseableAgent } from './parsers/index.js';
8
+ export { parseClaudeCodeTranscript } from './parsers/claude-code.js';
9
+ export { parseCodexTranscript } from './parsers/codex.js';
10
+ export { parseOpenCodeTranscript } from './parsers/opencode.js';
11
+ export { parseGeminiTranscript } from './parsers/gemini.js';
12
+ export { parseCursorTranscript } from './parsers/cursor.js';
13
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/o11y/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,YAAY,EACV,QAAQ,EACR,eAAe,EACf,YAAY,EACZ,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,UAAU,GACX,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,eAAe,EAAE,sBAAsB,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAC/G,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAGzD,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AACrE,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Observability module for agent-eval.
3
+ * Provides transcript parsing and analysis across all agents.
4
+ */
5
+ // Main parsing functions
6
+ export { parseTranscript, parseTranscriptSummary, loadTranscript, SUPPORTED_AGENTS } from './parsers/index.js';
7
+ // Individual parsers (for advanced use)
8
+ export { parseClaudeCodeTranscript } from './parsers/claude-code.js';
9
+ export { parseCodexTranscript } from './parsers/codex.js';
10
+ export { parseOpenCodeTranscript } from './parsers/opencode.js';
11
+ export { parseGeminiTranscript } from './parsers/gemini.js';
12
+ export { parseCursorTranscript } from './parsers/cursor.js';
13
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/o11y/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAaH,yBAAyB;AACzB,OAAO,EAAE,eAAe,EAAE,sBAAsB,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG/G,wCAAwC;AACxC,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AACrE,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC"}
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Parser for Claude Code transcript format.
3
+ * Claude Code stores transcripts as JSONL at ~/.claude/projects/{path}/{session}.jsonl
4
+ *
5
+ * Format reference (based on Claude Code CLI output):
6
+ * - Messages have type: "user" | "assistant"
7
+ * - Tool use appears in assistant messages with tool_use blocks
8
+ * - Tool results appear as separate messages with type: "tool_result"
9
+ */
10
+ import type { TranscriptEvent } from '../types.js';
11
+ /**
12
+ * Parse Claude Code JSONL transcript into events.
13
+ */
14
+ export declare function parseClaudeCodeTranscript(raw: string): {
15
+ events: TranscriptEvent[];
16
+ errors: string[];
17
+ };
18
+ //# sourceMappingURL=claude-code.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude-code.d.ts","sourceRoot":"","sources":["../../../../src/lib/o11y/parsers/claude-code.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAY,MAAM,aAAa,CAAC;AA8T7D;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,GAAG,EAAE,MAAM,GAAG;IACtD,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,CAgDA"}
@@ -0,0 +1,343 @@
1
+ /**
2
+ * Parser for Claude Code transcript format.
3
+ * Claude Code stores transcripts as JSONL at ~/.claude/projects/{path}/{session}.jsonl
4
+ *
5
+ * Format reference (based on Claude Code CLI output):
6
+ * - Messages have type: "user" | "assistant"
7
+ * - Tool use appears in assistant messages with tool_use blocks
8
+ * - Tool results appear as separate messages with type: "tool_result"
9
+ */
10
+ /**
11
+ * Map Claude Code tool names to canonical names.
12
+ */
13
+ function normalizeToolName(name) {
14
+ const toolMap = {
15
+ // File operations
16
+ Read: 'file_read',
17
+ read_file: 'file_read',
18
+ ReadFile: 'file_read',
19
+ Write: 'file_write',
20
+ write_file: 'file_write',
21
+ WriteFile: 'file_write',
22
+ write_to_file: 'file_write',
23
+ Edit: 'file_edit',
24
+ edit_file: 'file_edit',
25
+ EditFile: 'file_edit',
26
+ str_replace_editor: 'file_edit',
27
+ StrReplace: 'file_edit',
28
+ // Shell
29
+ Bash: 'shell',
30
+ bash: 'shell',
31
+ Shell: 'shell',
32
+ shell: 'shell',
33
+ execute_command: 'shell',
34
+ run_command: 'shell',
35
+ // Web
36
+ WebFetch: 'web_fetch',
37
+ web_fetch: 'web_fetch',
38
+ fetch_url: 'web_fetch',
39
+ mcp__fetch__fetch: 'web_fetch',
40
+ WebSearch: 'web_search',
41
+ web_search: 'web_search',
42
+ // Search/navigation
43
+ Glob: 'glob',
44
+ glob: 'glob',
45
+ list_files: 'glob',
46
+ Grep: 'grep',
47
+ grep: 'grep',
48
+ search_files: 'grep',
49
+ LS: 'list_dir',
50
+ list_dir: 'list_dir',
51
+ ListDir: 'list_dir',
52
+ // Agent/subagent tools
53
+ Task: 'agent_task',
54
+ task: 'agent_task',
55
+ };
56
+ return toolMap[name] || 'unknown';
57
+ }
58
+ /**
59
+ * Extract file path from tool arguments.
60
+ */
61
+ function extractFilePath(args) {
62
+ return (args.path || args.file_path || args.filename || args.file);
63
+ }
64
+ /**
65
+ * Extract URL from tool arguments.
66
+ */
67
+ function extractUrl(args) {
68
+ return (args.url || args.uri || args.href);
69
+ }
70
+ /**
71
+ * Extract command from tool arguments.
72
+ */
73
+ function extractCommand(args) {
74
+ if (typeof args.command === 'string')
75
+ return args.command;
76
+ if (Array.isArray(args.command))
77
+ return args.command.join(' ');
78
+ if (typeof args.cmd === 'string')
79
+ return args.cmd;
80
+ return undefined;
81
+ }
82
+ /**
83
+ * Parse a single JSONL line from Claude Code transcript.
84
+ */
85
+ function parseClaudeCodeLine(line) {
86
+ const events = [];
87
+ try {
88
+ const data = JSON.parse(line);
89
+ // Handle different Claude Code message formats
90
+ if (data.type === 'user' || data.role === 'user') {
91
+ // Check if this is a tool_result message (user message containing tool results)
92
+ const contentArray = getContentArray(data);
93
+ const toolResults = contentArray?.filter((block) => block.type === 'tool_result');
94
+ if (toolResults && toolResults.length > 0) {
95
+ // Extract tool results from user message
96
+ for (const result of toolResults) {
97
+ const r = result;
98
+ events.push({
99
+ timestamp: data.timestamp,
100
+ type: 'tool_result',
101
+ tool: {
102
+ name: 'unknown',
103
+ originalName: (r.tool_use_id || 'unknown'),
104
+ result: r.content,
105
+ success: !r.is_error && !r.error,
106
+ },
107
+ raw: r,
108
+ });
109
+ }
110
+ }
111
+ else {
112
+ // Regular user message
113
+ events.push({
114
+ timestamp: data.timestamp,
115
+ type: 'message',
116
+ role: 'user',
117
+ content: extractContent(data),
118
+ raw: data,
119
+ });
120
+ }
121
+ }
122
+ else if (data.type === 'assistant' || data.role === 'assistant') {
123
+ // Assistant message - may contain text and/or tool_use blocks
124
+ const content = extractContent(data);
125
+ if (content) {
126
+ events.push({
127
+ timestamp: data.timestamp,
128
+ type: 'message',
129
+ role: 'assistant',
130
+ content,
131
+ raw: data,
132
+ });
133
+ }
134
+ // Extract tool_use blocks
135
+ const toolUses = extractToolUses(data);
136
+ for (const toolUse of toolUses) {
137
+ events.push({
138
+ timestamp: data.timestamp,
139
+ type: 'tool_call',
140
+ tool: {
141
+ name: normalizeToolName(toolUse.name),
142
+ originalName: toolUse.name,
143
+ args: toolUse.input || toolUse.args || {},
144
+ },
145
+ raw: toolUse,
146
+ });
147
+ }
148
+ // Extract thinking blocks
149
+ const thinking = extractThinking(data);
150
+ if (thinking) {
151
+ events.push({
152
+ timestamp: data.timestamp,
153
+ type: 'thinking',
154
+ content: thinking,
155
+ raw: data,
156
+ });
157
+ }
158
+ }
159
+ else if (data.type === 'tool_result' || data.type === 'tool_response') {
160
+ events.push({
161
+ timestamp: data.timestamp,
162
+ type: 'tool_result',
163
+ tool: {
164
+ name: 'unknown',
165
+ originalName: data.tool_use_id || 'unknown',
166
+ result: data.content || data.output || data.result,
167
+ success: !data.is_error && !data.error,
168
+ },
169
+ raw: data,
170
+ });
171
+ }
172
+ else if (data.type === 'system' || data.role === 'system') {
173
+ events.push({
174
+ timestamp: data.timestamp,
175
+ type: 'message',
176
+ role: 'system',
177
+ content: extractContent(data),
178
+ raw: data,
179
+ });
180
+ }
181
+ else if (data.type === 'error' || data.error) {
182
+ events.push({
183
+ timestamp: data.timestamp,
184
+ type: 'error',
185
+ content: data.error?.message || data.message || JSON.stringify(data.error),
186
+ raw: data,
187
+ });
188
+ }
189
+ }
190
+ catch {
191
+ // Skip unparseable lines
192
+ }
193
+ return events;
194
+ }
195
+ /**
196
+ * Get the content array from data, handling nested message format.
197
+ * Claude Code wraps messages: { type: "assistant", message: { content: [...] } }
198
+ */
199
+ function getContentArray(data) {
200
+ // Direct content array
201
+ if (Array.isArray(data.content)) {
202
+ return data.content;
203
+ }
204
+ // Nested message format (real Claude Code format)
205
+ const message = data.message;
206
+ if (message && Array.isArray(message.content)) {
207
+ return message.content;
208
+ }
209
+ return undefined;
210
+ }
211
+ /**
212
+ * Get string content from data, handling nested message format.
213
+ */
214
+ function getStringContent(data) {
215
+ if (typeof data.content === 'string') {
216
+ return data.content;
217
+ }
218
+ const message = data.message;
219
+ if (message && typeof message.content === 'string') {
220
+ return message.content;
221
+ }
222
+ return undefined;
223
+ }
224
+ /**
225
+ * Extract text content from various message formats.
226
+ */
227
+ function extractContent(data) {
228
+ // Check for direct string content
229
+ const stringContent = getStringContent(data);
230
+ if (stringContent) {
231
+ return stringContent;
232
+ }
233
+ // Check for content blocks array
234
+ const contentArray = getContentArray(data);
235
+ if (contentArray) {
236
+ const textBlocks = contentArray.filter((block) => block.type === 'text');
237
+ if (textBlocks.length > 0) {
238
+ return textBlocks.map((b) => b.text).join('\n');
239
+ }
240
+ }
241
+ if (typeof data.text === 'string') {
242
+ return data.text;
243
+ }
244
+ // Note: don't check data.message as string since message is an object in Claude Code format
245
+ return undefined;
246
+ }
247
+ /**
248
+ * Extract tool_use blocks from assistant messages.
249
+ */
250
+ function extractToolUses(data) {
251
+ const toolUses = [];
252
+ // Check content array (handles both direct and nested message format)
253
+ const contentArray = getContentArray(data);
254
+ if (contentArray) {
255
+ for (const block of contentArray) {
256
+ const b = block;
257
+ if (b.type === 'tool_use') {
258
+ toolUses.push({
259
+ name: b.name,
260
+ input: b.input,
261
+ });
262
+ }
263
+ }
264
+ }
265
+ // Also check for tool_calls array format (OpenAI-style)
266
+ const toolCalls = data.tool_calls || data.message?.tool_calls;
267
+ if (Array.isArray(toolCalls)) {
268
+ for (const call of toolCalls) {
269
+ const c = call;
270
+ const func = c.function;
271
+ toolUses.push({
272
+ name: (func?.name || c.name),
273
+ args: func?.arguments
274
+ ? JSON.parse(func.arguments)
275
+ : (c.arguments || c.input),
276
+ });
277
+ }
278
+ }
279
+ return toolUses;
280
+ }
281
+ /**
282
+ * Extract thinking/reasoning content.
283
+ */
284
+ function extractThinking(data) {
285
+ const contentArray = getContentArray(data);
286
+ if (contentArray) {
287
+ const thinkingBlocks = contentArray.filter((block) => block.type === 'thinking');
288
+ if (thinkingBlocks.length > 0) {
289
+ return thinkingBlocks.map((b) => {
290
+ const block = b;
291
+ return block.thinking || block.text;
292
+ }).join('\n');
293
+ }
294
+ }
295
+ return undefined;
296
+ }
297
+ /**
298
+ * Parse Claude Code JSONL transcript into events.
299
+ */
300
+ export function parseClaudeCodeTranscript(raw) {
301
+ const events = [];
302
+ const errors = [];
303
+ const lines = raw.split('\n').filter((line) => line.trim());
304
+ for (const line of lines) {
305
+ try {
306
+ const lineEvents = parseClaudeCodeLine(line);
307
+ events.push(...lineEvents);
308
+ }
309
+ catch (e) {
310
+ errors.push(`Failed to parse line: ${e instanceof Error ? e.message : String(e)}`);
311
+ }
312
+ }
313
+ // Post-process to link tool_results to their tool_calls
314
+ // and extract additional metadata
315
+ for (const event of events) {
316
+ if (event.type === 'tool_call' && event.tool) {
317
+ const args = event.tool.args || {};
318
+ // Extract file paths for file operations
319
+ if (['file_read', 'file_write', 'file_edit'].includes(event.tool.name)) {
320
+ const path = extractFilePath(args);
321
+ if (path) {
322
+ event.tool.args = { ...args, _extractedPath: path };
323
+ }
324
+ }
325
+ // Extract URLs for web fetches
326
+ if (event.tool.name === 'web_fetch') {
327
+ const url = extractUrl(args);
328
+ if (url) {
329
+ event.tool.args = { ...args, _extractedUrl: url };
330
+ }
331
+ }
332
+ // Extract commands for shell operations
333
+ if (event.tool.name === 'shell') {
334
+ const command = extractCommand(args);
335
+ if (command) {
336
+ event.tool.args = { ...args, _extractedCommand: command };
337
+ }
338
+ }
339
+ }
340
+ }
341
+ return { events, errors };
342
+ }
343
+ //# sourceMappingURL=claude-code.js.map