@vercel/agent-eval 0.0.8 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -24
- package/dist/cli.js +40 -13
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/lib/agents/codex.d.ts.map +1 -1
- package/dist/lib/agents/codex.js +13 -25
- package/dist/lib/agents/codex.js.map +1 -1
- package/dist/lib/agents/index.d.ts.map +1 -1
- package/dist/lib/agents/index.js +0 -2
- package/dist/lib/agents/index.js.map +1 -1
- package/dist/lib/config.js +1 -1
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/o11y/index.d.ts +11 -0
- package/dist/lib/o11y/index.d.ts.map +1 -0
- package/dist/lib/o11y/index.js +11 -0
- package/dist/lib/o11y/index.js.map +1 -0
- package/dist/lib/o11y/parsers/claude-code.d.ts +18 -0
- package/dist/lib/o11y/parsers/claude-code.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/claude-code.js +343 -0
- package/dist/lib/o11y/parsers/claude-code.js.map +1 -0
- package/dist/lib/o11y/parsers/codex.d.ts +17 -0
- package/dist/lib/o11y/parsers/codex.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/codex.js +296 -0
- package/dist/lib/o11y/parsers/codex.js.map +1 -0
- package/dist/lib/o11y/parsers/index.d.ts +51 -0
- package/dist/lib/o11y/parsers/index.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/index.js +280 -0
- package/dist/lib/o11y/parsers/index.js.map +1 -0
- package/dist/lib/o11y/parsers/opencode.d.ts +17 -0
- package/dist/lib/o11y/parsers/opencode.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/opencode.js +313 -0
- package/dist/lib/o11y/parsers/opencode.js.map +1 -0
- package/dist/lib/o11y/types.d.ts +113 -0
- package/dist/lib/o11y/types.d.ts.map +1 -0
- package/dist/lib/o11y/types.js +6 -0
- package/dist/lib/o11y/types.js.map +1 -0
- package/dist/lib/results.d.ts +2 -1
- package/dist/lib/results.d.ts.map +1 -1
- package/dist/lib/results.js +23 -7
- package/dist/lib/results.js.map +1 -1
- package/dist/lib/runner.d.ts +5 -5
- package/dist/lib/runner.d.ts.map +1 -1
- package/dist/lib/runner.js +19 -10
- package/dist/lib/runner.js.map +1 -1
- package/dist/lib/types.d.ts +21 -3
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/package.json +9 -3
- package/dist/lib/agents/ai-sdk-agent.d.ts +0 -10
- package/dist/lib/agents/ai-sdk-agent.d.ts.map +0 -1
- package/dist/lib/agents/ai-sdk-agent.js +0 -427
- package/dist/lib/agents/ai-sdk-agent.js.map +0 -1
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parser for OpenCode CLI transcript format.
|
|
3
|
+
* OpenCode outputs JSON events to stdout when run with --format json.
|
|
4
|
+
*
|
|
5
|
+
* Format reference (based on OpenCode CLI output):
|
|
6
|
+
* - Events have a "kind" field indicating the event type
|
|
7
|
+
* - Messages, tool calls, and results are separate events
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Map OpenCode tool names to canonical names.
|
|
11
|
+
*/
|
|
12
|
+
function normalizeToolName(name) {
|
|
13
|
+
const toolMap = {
|
|
14
|
+
// File operations
|
|
15
|
+
read: 'file_read',
|
|
16
|
+
read_file: 'file_read',
|
|
17
|
+
file_read: 'file_read',
|
|
18
|
+
write: 'file_write',
|
|
19
|
+
write_file: 'file_write',
|
|
20
|
+
file_write: 'file_write',
|
|
21
|
+
create: 'file_write',
|
|
22
|
+
edit: 'file_edit',
|
|
23
|
+
edit_file: 'file_edit',
|
|
24
|
+
file_edit: 'file_edit',
|
|
25
|
+
patch: 'file_edit',
|
|
26
|
+
// Shell
|
|
27
|
+
bash: 'shell',
|
|
28
|
+
shell: 'shell',
|
|
29
|
+
exec: 'shell',
|
|
30
|
+
execute: 'shell',
|
|
31
|
+
run: 'shell',
|
|
32
|
+
command: 'shell',
|
|
33
|
+
// Web
|
|
34
|
+
fetch: 'web_fetch',
|
|
35
|
+
http: 'web_fetch',
|
|
36
|
+
request: 'web_fetch',
|
|
37
|
+
web_fetch: 'web_fetch',
|
|
38
|
+
search: 'web_search',
|
|
39
|
+
web_search: 'web_search',
|
|
40
|
+
// Search/navigation
|
|
41
|
+
glob: 'glob',
|
|
42
|
+
find: 'glob',
|
|
43
|
+
list: 'glob',
|
|
44
|
+
grep: 'grep',
|
|
45
|
+
rg: 'grep',
|
|
46
|
+
ripgrep: 'grep',
|
|
47
|
+
ls: 'list_dir',
|
|
48
|
+
dir: 'list_dir',
|
|
49
|
+
list_dir: 'list_dir',
|
|
50
|
+
};
|
|
51
|
+
return toolMap[name.toLowerCase()] || 'unknown';
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Extract file path from tool arguments.
|
|
55
|
+
*/
|
|
56
|
+
function extractFilePath(args) {
|
|
57
|
+
return (args.path || args.filePath || args.file || args.filename || args.target);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Extract URL from tool arguments.
|
|
61
|
+
*/
|
|
62
|
+
function extractUrl(args) {
|
|
63
|
+
return (args.url || args.uri || args.href || args.endpoint);
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Extract command from tool arguments.
|
|
67
|
+
*/
|
|
68
|
+
function extractCommand(args) {
|
|
69
|
+
if (typeof args.command === 'string')
|
|
70
|
+
return args.command;
|
|
71
|
+
if (typeof args.cmd === 'string')
|
|
72
|
+
return args.cmd;
|
|
73
|
+
if (typeof args.script === 'string')
|
|
74
|
+
return args.script;
|
|
75
|
+
if (Array.isArray(args.args)) {
|
|
76
|
+
const program = args.program || args.bin || args.executable || '';
|
|
77
|
+
return `${program} ${args.args.join(' ')}`.trim();
|
|
78
|
+
}
|
|
79
|
+
return undefined;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Parse a single JSONL line from OpenCode transcript.
|
|
83
|
+
* Handles the real OpenCode format:
|
|
84
|
+
* - type: "tool_use" | "text" | "step_start" | "step_finish"
|
|
85
|
+
* - Tool info in part.tool, part.state.input, part.state.output
|
|
86
|
+
* - Text in part.text
|
|
87
|
+
*/
|
|
88
|
+
function parseOpenCodeLine(line) {
|
|
89
|
+
const events = [];
|
|
90
|
+
try {
|
|
91
|
+
const data = JSON.parse(line);
|
|
92
|
+
// OpenCode uses "type" for event type
|
|
93
|
+
const eventType = data.type || data.kind || data.event;
|
|
94
|
+
const part = data.part;
|
|
95
|
+
const state = part?.state;
|
|
96
|
+
switch (eventType) {
|
|
97
|
+
// Real OpenCode format: tool_use with part.tool
|
|
98
|
+
case 'tool_use': {
|
|
99
|
+
if (part && part.tool) {
|
|
100
|
+
const name = part.tool;
|
|
101
|
+
const args = state?.input || {};
|
|
102
|
+
const output = state?.output;
|
|
103
|
+
const status = state?.status;
|
|
104
|
+
// Emit tool_call event
|
|
105
|
+
events.push({
|
|
106
|
+
timestamp: data.timestamp ? new Date(data.timestamp).toISOString() : undefined,
|
|
107
|
+
type: 'tool_call',
|
|
108
|
+
tool: {
|
|
109
|
+
name: normalizeToolName(name),
|
|
110
|
+
originalName: name,
|
|
111
|
+
args,
|
|
112
|
+
},
|
|
113
|
+
raw: data,
|
|
114
|
+
});
|
|
115
|
+
// If completed, also emit tool_result
|
|
116
|
+
if (status === 'completed' && output !== undefined) {
|
|
117
|
+
events.push({
|
|
118
|
+
timestamp: data.timestamp ? new Date(data.timestamp).toISOString() : undefined,
|
|
119
|
+
type: 'tool_result',
|
|
120
|
+
tool: {
|
|
121
|
+
name: normalizeToolName(name),
|
|
122
|
+
originalName: name,
|
|
123
|
+
result: output,
|
|
124
|
+
success: status === 'completed' && !state?.error,
|
|
125
|
+
},
|
|
126
|
+
raw: state,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
// Real OpenCode format: text with part.text
|
|
133
|
+
case 'text': {
|
|
134
|
+
const text = part?.text;
|
|
135
|
+
if (text && text.trim()) {
|
|
136
|
+
events.push({
|
|
137
|
+
timestamp: data.timestamp ? new Date(data.timestamp).toISOString() : undefined,
|
|
138
|
+
type: 'message',
|
|
139
|
+
role: 'assistant',
|
|
140
|
+
content: text,
|
|
141
|
+
raw: data,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
// Step events - extract cost/token info if needed
|
|
147
|
+
case 'step_start':
|
|
148
|
+
case 'step_finish': {
|
|
149
|
+
// These are metadata events, skip for now
|
|
150
|
+
// Could extract token usage from step_finish if needed
|
|
151
|
+
break;
|
|
152
|
+
}
|
|
153
|
+
// Legacy/fallback formats
|
|
154
|
+
case 'message':
|
|
155
|
+
case 'response':
|
|
156
|
+
case 'assistant':
|
|
157
|
+
case 'user': {
|
|
158
|
+
const role = data.role || eventType === 'assistant'
|
|
159
|
+
? 'assistant'
|
|
160
|
+
: eventType === 'user'
|
|
161
|
+
? 'user'
|
|
162
|
+
: 'assistant';
|
|
163
|
+
const content = data.message?.content || data.content || data.text;
|
|
164
|
+
if (content) {
|
|
165
|
+
events.push({
|
|
166
|
+
timestamp: data.timestamp || data.time,
|
|
167
|
+
type: 'message',
|
|
168
|
+
role: role,
|
|
169
|
+
content,
|
|
170
|
+
raw: data,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
// Check for tool calls within the message
|
|
174
|
+
const toolCalls = data.message?.tool_calls || data.tool_calls || [];
|
|
175
|
+
for (const call of toolCalls) {
|
|
176
|
+
const name = call.function?.name || call.name;
|
|
177
|
+
const args = call.function?.arguments
|
|
178
|
+
? typeof call.function.arguments === 'string'
|
|
179
|
+
? JSON.parse(call.function.arguments)
|
|
180
|
+
: call.function.arguments
|
|
181
|
+
: call.arguments || call.input || {};
|
|
182
|
+
events.push({
|
|
183
|
+
timestamp: data.timestamp || data.time,
|
|
184
|
+
type: 'tool_call',
|
|
185
|
+
tool: {
|
|
186
|
+
name: normalizeToolName(name),
|
|
187
|
+
originalName: name,
|
|
188
|
+
args,
|
|
189
|
+
},
|
|
190
|
+
raw: call,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
break;
|
|
194
|
+
}
|
|
195
|
+
case 'tool_call':
|
|
196
|
+
case 'function_call':
|
|
197
|
+
case 'action': {
|
|
198
|
+
const name = data.tool || data.function || data.name || data.action;
|
|
199
|
+
const args = data.input || data.arguments || data.params || {};
|
|
200
|
+
events.push({
|
|
201
|
+
timestamp: data.timestamp || data.time,
|
|
202
|
+
type: 'tool_call',
|
|
203
|
+
tool: {
|
|
204
|
+
name: normalizeToolName(name),
|
|
205
|
+
originalName: name,
|
|
206
|
+
args,
|
|
207
|
+
},
|
|
208
|
+
raw: data,
|
|
209
|
+
});
|
|
210
|
+
break;
|
|
211
|
+
}
|
|
212
|
+
case 'tool_result':
|
|
213
|
+
case 'function_result':
|
|
214
|
+
case 'action_result':
|
|
215
|
+
case 'result': {
|
|
216
|
+
events.push({
|
|
217
|
+
timestamp: data.timestamp || data.time,
|
|
218
|
+
type: 'tool_result',
|
|
219
|
+
tool: {
|
|
220
|
+
name: 'unknown',
|
|
221
|
+
originalName: data.tool || data.function || 'unknown',
|
|
222
|
+
result: data.output || data.result || data.content,
|
|
223
|
+
success: data.success !== false && !data.error,
|
|
224
|
+
},
|
|
225
|
+
raw: data,
|
|
226
|
+
});
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
case 'thinking':
|
|
230
|
+
case 'reasoning': {
|
|
231
|
+
events.push({
|
|
232
|
+
timestamp: data.timestamp || data.time,
|
|
233
|
+
type: 'thinking',
|
|
234
|
+
content: data.content || data.text || data.thinking,
|
|
235
|
+
raw: data,
|
|
236
|
+
});
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
case 'error': {
|
|
240
|
+
events.push({
|
|
241
|
+
timestamp: data.timestamp || data.time,
|
|
242
|
+
type: 'error',
|
|
243
|
+
content: data.error?.message || data.message || data.content,
|
|
244
|
+
raw: data,
|
|
245
|
+
});
|
|
246
|
+
break;
|
|
247
|
+
}
|
|
248
|
+
default: {
|
|
249
|
+
// Try to infer from structure
|
|
250
|
+
if (data.message && typeof data.message === 'object') {
|
|
251
|
+
const role = data.message.role || 'assistant';
|
|
252
|
+
events.push({
|
|
253
|
+
timestamp: data.timestamp || data.time,
|
|
254
|
+
type: 'message',
|
|
255
|
+
role: role,
|
|
256
|
+
content: data.message.content,
|
|
257
|
+
raw: data,
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
catch {
|
|
264
|
+
// Skip unparseable lines
|
|
265
|
+
}
|
|
266
|
+
return events;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Parse OpenCode JSONL transcript into normalized events.
|
|
270
|
+
*/
|
|
271
|
+
export function parseOpenCodeTranscript(raw) {
|
|
272
|
+
const events = [];
|
|
273
|
+
const errors = [];
|
|
274
|
+
const lines = raw.split('\n').filter((line) => line.trim());
|
|
275
|
+
for (const line of lines) {
|
|
276
|
+
try {
|
|
277
|
+
const lineEvents = parseOpenCodeLine(line);
|
|
278
|
+
events.push(...lineEvents);
|
|
279
|
+
}
|
|
280
|
+
catch (e) {
|
|
281
|
+
errors.push(`Failed to parse line: ${e instanceof Error ? e.message : String(e)}`);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
// Post-process to extract additional metadata
|
|
285
|
+
for (const event of events) {
|
|
286
|
+
if (event.type === 'tool_call' && event.tool) {
|
|
287
|
+
const args = event.tool.args || {};
|
|
288
|
+
// Extract file paths for file operations
|
|
289
|
+
if (['file_read', 'file_write', 'file_edit'].includes(event.tool.name)) {
|
|
290
|
+
const path = extractFilePath(args);
|
|
291
|
+
if (path) {
|
|
292
|
+
event.tool.args = { ...args, _extractedPath: path };
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
// Extract URLs for web fetches
|
|
296
|
+
if (event.tool.name === 'web_fetch') {
|
|
297
|
+
const url = extractUrl(args);
|
|
298
|
+
if (url) {
|
|
299
|
+
event.tool.args = { ...args, _extractedUrl: url };
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
// Extract commands for shell operations
|
|
303
|
+
if (event.tool.name === 'shell') {
|
|
304
|
+
const command = extractCommand(args);
|
|
305
|
+
if (command) {
|
|
306
|
+
event.tool.args = { ...args, _extractedCommand: command };
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return { events, errors };
|
|
312
|
+
}
|
|
313
|
+
//# sourceMappingURL=opencode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"opencode.js","sourceRoot":"","sources":["../../../../src/lib/o11y/parsers/opencode.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,OAAO,GAA6B;QACxC,kBAAkB;QAClB,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,WAAW;QACtB,SAAS,EAAE,WAAW;QACtB,KAAK,EAAE,YAAY;QACnB,UAAU,EAAE,YAAY;QACxB,UAAU,EAAE,YAAY;QACxB,MAAM,EAAE,YAAY;QACpB,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,WAAW;QACtB,SAAS,EAAE,WAAW;QACtB,KAAK,EAAE,WAAW;QAElB,QAAQ;QACR,IAAI,EAAE,OAAO;QACb,KAAK,EAAE,OAAO;QACd,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO;QAChB,GAAG,EAAE,OAAO;QACZ,OAAO,EAAE,OAAO;QAEhB,MAAM;QACN,KAAK,EAAE,WAAW;QAClB,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,WAAW;QACpB,SAAS,EAAE,WAAW;QACtB,MAAM,EAAE,YAAY;QACpB,UAAU,EAAE,YAAY;QAExB,oBAAoB;QACpB,IAAI,EAAE,MAAM;QACZ,IAAI,EAAE,MAAM;QACZ,IAAI,EAAE,MAAM;QACZ,IAAI,EAAE,MAAM;QACZ,EAAE,EAAE,MAAM;QACV,OAAO,EAAE,MAAM;QACf,EAAE,EAAE,UAAU;QACd,GAAG,EAAE,UAAU;QACf,QAAQ,EAAE,UAAU;KACrB,CAAC;IAEF,OAAO,OAAO,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,IAA6B;IACpD,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAuB,CAAC;AACzG,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,IAA6B;IAC/C,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,QAAQ,CAAuB,CAAC;AACpF,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAA6B;IACnD,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC,OAAO,CAAC;IAC1D,IAAI,OAAO,IAAI,CAAC,GAAG,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC,GAAG,CAAC;IAClD,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC,MAAM,CAAC;IACxD,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,UAAU,IAAI,EAAE,CAAC;QAClE,OAAO,GAAG,OAAO,IAAK,IAAI,CAAC,IAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC;IAClE,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE9B,sCAAsC;QACtC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC;QACvD,MAAM,IAAI,GAAG,IAAI,CAAC,IAA2C,CAAC;QAC9D,MAAM,KAAK,GAAG,IAAI,EAAE,KAA4C,CAAC;QAEjE,QAAQ,SAAS,EAAE,CAAC;YAClB,gDAAgD;YAChD,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;oBACtB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAc,CAAC;oBACjC,MAAM,IAAI,GAAI,KAAK,EAAE,KAAiC,IAAI,EAAE,CAAC;oBAC7D,MAAM,MAAM,GAAG,KAAK,EAAE,MAAM,CAAC;oBAC7B,MAAM,MAAM,GAAG,KAAK,EAAE,MAA4B,CAAC;oBAEnD,uBAAuB;oBACvB,MAAM,CAAC,IAAI,CAAC;wBACV,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,SAAS;wBAC9E,IAAI,EAAE,WAAW;wBACjB,IAAI,EAAE;4BACJ,IAAI,EAAE,iBAAiB,CAAC,IAAI,CAAC;4BAC7B,YAAY,EAAE,IAAI;4BAClB,IAAI;yBACL;wBACD,GAAG,EAAE,IAAI;qBACV,CAAC,CAAC;oBAEH,sCAAsC;oBACtC,IAAI,MAAM,KAAK,WAAW,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;wBACnD,MAAM,CAAC,IAAI,CAAC;4BACV,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,SAAS;4BAC9E,IAAI,EAAE,aAAa;4BACnB,IAAI,EAAE;gCACJ,IAAI,EAAE,iBAAiB,CAAC,IAAI,CAAC;gCAC7B,YAAY,EAAE,IAAI;gCAClB,MAAM,EAAE,MAAM;gCACd,OAAO,EAAE,MAAM,KAAK,WAAW,IAAI,CAAC,KAAK,EAAE,KAAK;6BACjD;4BACD,GAAG,EAAE,KAAK;yBACX,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;gBACD,MAAM;YACR,CAAC;YAED,4CAA4C;YAC5C,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,IAAI,GAAG,IAAI,EAAE,IAA0B,CAAC;gBAC9C,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;oBACxB,MAAM,CAAC,IAAI,CAAC;wBACV,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,SAAS;wBAC9E,IAAI,EAAE,SAAS;wBACf,IAAI,EAAE,WAAW;wBACjB,OAAO,EAAE,IAAI;wBACb,GAAG,EAAE,IAAI;qBACV,CAAC,CAAC;gBACL,CAAC;gBACD,MAAM;YACR,CAAC;YAED,kDAAkD;YAClD,KAAK,YAAY,CAAC;YAClB,KAAK,aAAa,CAAC,CAAC,CAAC;gBACnB,0CAA0C;gBAC1C,uDAAuD;gBACvD,MAAM;YACR,CAAC;YAED,0BAA0B;YAC1B,KAAK,SAAS,CAAC;YACf,KAAK,UAAU,CAAC;YAChB,KAAK,WAAW,CAAC;YACjB,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,IAAI,GACR,IAAI,CAAC,IAAI,IAAI,SAAS,KAAK,WAAW;oBACpC,CAAC,CAAC,WAAW;oBACb,CAAC,CAAC,SAAS,KAAK,MAAM;wBACpB,CAAC,CAAC,MAAM;wBACR,CAAC,CAAC,WAAW,CAAC;gBACpB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,OAAO,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC;gBAEnE,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,IAAI,CAAC;wBACV,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI;wBACtC,IAAI,EAAE,SAAS;wBACf,IAAI,EAAE,IAAuC;wBAC7C,OAAO;wBACP,GAAG,EAAE,IAAI;qBACV,CAAC,CAAC;gBACL,CAAC;gBAED,0CAA0C;gBAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,EAAE,UAAU,IAAI,IAAI,CAAC,UAAU,IAAI,EAAE,CAAC;gBACpE,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;oBAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC;oBAC9C,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,SAAS;wBACnC,CAAC,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,SAAS,KAAK,QAAQ;4BAC3C,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC;4BACrC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS;wBAC3B,CAAC,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;oBAEvC,MAAM,CAAC,IAAI,CAAC;wBACV,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI;wBACtC,IAAI,EAAE,WAAW;wBACjB,IAAI,EAAE;4BACJ,IAAI,EAAE,iBAAiB,CAAC,IAAI,CAAC;4BAC7B,YAAY,EAAE,IAAI;4BAClB,IAAI;yBACL;wBACD,GAAG,EAAE,IAAI;qBACV,CAAC,CAAC;gBACL,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,WAAW,CAAC;YACjB,KAAK,eAAe,CAAC;YACrB,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC;gBACpE,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;gBAE/D,MAAM,CAAC,IAAI,CAAC;oBACV,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI;oBACtC,IAAI,EAAE,WAAW;oBACjB,IAAI,EAAE;wBACJ,IAAI,EAAE,iBAAiB,CAAC,IAAI,CAAC;wBAC7B,YAAY,EAAE,IAAI;wBAClB,IAAI;qBACL;oBACD,GAAG,EAAE,IAAI;iBACV,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAED,KAAK,aAAa,CAAC;YACnB,KAAK,iBAAiB,CAAC;YACvB,KAAK,eAAe,CAAC;YACrB,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,CAAC,IAAI,CAAC;oBACV,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI;oBACtC,IAAI,EAAE,aAAa;oBACnB,IAAI,EAAE;wBACJ,IAAI,EAAE,SAAS;wBACf,YAAY,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,QAAQ,IAAI,SAAS;wBACrD,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,OAAO;wBAClD,OAAO,EAAE,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,CAAC,IAAI,CAAC,KAAK;qBAC/C;oBACD,GAAG,EAAE,IAAI;iBACV,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAED,KAAK,UAAU,CAAC;YAChB,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,CAAC,IAAI,CAAC;oBACV,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI;oBACtC,IAAI,EAAE,UAAU;oBAChB,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,QAAQ;oBACnD,GAAG,EAAE,IAAI;iBACV,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC;oBACV,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI;oBACtC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,IAAI,CAAC,KAAK,EAAE,OAAO,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;oBAC5D,GAAG,EAAE,IAAI;iBACV,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAED,OAAO,CAAC,CAAC,CAAC;gBACR,8BAA8B;gBAC9B,IAAI,IAAI,CAAC,OAAO,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;oBACrD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,WAAW,CAAC;oBAC9C,MAAM,CAAC,IAAI,CAAC;wBACV,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI;wBACtC,IAAI,EAAE,SAAS;wBACf,IAAI,EAAE,IAAuC;wBAC7C,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO;wBAC7B,GAAG,EAAE,IAAI;qBACV,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,yBAAyB;IAC3B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,GAAW;IAIjD,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IAE5D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAC3C,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAC7B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACrF,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;YAEnC,yCAAyC;YACzC,IAAI,CAAC,WAAW,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvE,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;gBACnC,IAAI,IAAI,EAAE,CAAC;oBACT,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,EAAE,GAAG,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;gBACtD,CAAC;YACH,CAAC;YAED,+BAA+B;YAC/B,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;gBAC7B,IAAI,GAAG,EAAE,CAAC;oBACR,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,EAAE,GAAG,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,wCAAwC;YACxC,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;gBAChC,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;gBACrC,IAAI,OAAO,EAAE,CAAC;oBACZ,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,EAAE,GAAG,IAAI,EAAE,iBAAiB,EAAE,OAAO,EAAE,CAAC;gBAC5D,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observability types for cross-agent transcript analysis.
|
|
3
|
+
* Provides a unified schema regardless of which agent produced the transcript.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Canonical tool names across agents.
|
|
7
|
+
* Maps agent-specific tool names to standardized names.
|
|
8
|
+
*/
|
|
9
|
+
export type ToolName = 'file_read' | 'file_write' | 'file_edit' | 'shell' | 'web_fetch' | 'web_search' | 'glob' | 'grep' | 'list_dir' | 'agent_task' | 'unknown';
|
|
10
|
+
/**
|
|
11
|
+
* An event in the transcript.
|
|
12
|
+
*/
|
|
13
|
+
export interface TranscriptEvent {
|
|
14
|
+
/** ISO timestamp of the event */
|
|
15
|
+
timestamp?: string;
|
|
16
|
+
/** Event type */
|
|
17
|
+
type: 'message' | 'tool_call' | 'tool_result' | 'thinking' | 'error';
|
|
18
|
+
/** For message events: the role */
|
|
19
|
+
role?: 'user' | 'assistant' | 'system';
|
|
20
|
+
/** Text content (for messages, thinking, errors) */
|
|
21
|
+
content?: string;
|
|
22
|
+
/** For tool_call and tool_result events */
|
|
23
|
+
tool?: {
|
|
24
|
+
/** Canonical tool name */
|
|
25
|
+
name: ToolName;
|
|
26
|
+
/** Original tool name from the agent */
|
|
27
|
+
originalName: string;
|
|
28
|
+
/** Tool arguments */
|
|
29
|
+
args?: Record<string, unknown>;
|
|
30
|
+
/** Tool result (for tool_result events) */
|
|
31
|
+
result?: unknown;
|
|
32
|
+
/** Duration in milliseconds (if available) */
|
|
33
|
+
durationMs?: number;
|
|
34
|
+
/** Whether the tool call succeeded */
|
|
35
|
+
success?: boolean;
|
|
36
|
+
};
|
|
37
|
+
/** Raw event data from the agent (for debugging) */
|
|
38
|
+
raw?: unknown;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Web fetch information extracted from tool calls.
|
|
42
|
+
*/
|
|
43
|
+
export interface WebFetchInfo {
|
|
44
|
+
/** The URL that was fetched */
|
|
45
|
+
url: string;
|
|
46
|
+
/** HTTP method (if known) */
|
|
47
|
+
method?: string;
|
|
48
|
+
/** HTTP status code (if available) */
|
|
49
|
+
status?: number;
|
|
50
|
+
/** Whether the fetch succeeded */
|
|
51
|
+
success?: boolean;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* File operation information.
|
|
55
|
+
*/
|
|
56
|
+
export interface FileOperationInfo {
|
|
57
|
+
/** File path */
|
|
58
|
+
path: string;
|
|
59
|
+
/** Operation type */
|
|
60
|
+
operation: 'read' | 'write' | 'edit';
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Shell command information.
|
|
64
|
+
*/
|
|
65
|
+
export interface ShellCommandInfo {
|
|
66
|
+
/** The command that was run */
|
|
67
|
+
command: string;
|
|
68
|
+
/** Exit code (if available) */
|
|
69
|
+
exitCode?: number;
|
|
70
|
+
/** Whether the command succeeded */
|
|
71
|
+
success?: boolean;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Summary statistics derived from the transcript.
|
|
75
|
+
*/
|
|
76
|
+
export interface TranscriptSummary {
|
|
77
|
+
/** Total number of conversation turns */
|
|
78
|
+
totalTurns: number;
|
|
79
|
+
/** Count of each tool type used */
|
|
80
|
+
toolCalls: Record<ToolName, number>;
|
|
81
|
+
/** Total tool calls */
|
|
82
|
+
totalToolCalls: number;
|
|
83
|
+
/** Web fetches made during the run */
|
|
84
|
+
webFetches: WebFetchInfo[];
|
|
85
|
+
/** Files that were read */
|
|
86
|
+
filesRead: string[];
|
|
87
|
+
/** Files that were written or edited */
|
|
88
|
+
filesModified: string[];
|
|
89
|
+
/** Shell commands executed */
|
|
90
|
+
shellCommands: ShellCommandInfo[];
|
|
91
|
+
/** Errors encountered */
|
|
92
|
+
errors: string[];
|
|
93
|
+
/** Thinking/reasoning blocks (if available) */
|
|
94
|
+
thinkingBlocks: number;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* A parsed transcript with events and summary.
|
|
98
|
+
*/
|
|
99
|
+
export interface Transcript {
|
|
100
|
+
/** Agent that produced this transcript */
|
|
101
|
+
agent: string;
|
|
102
|
+
/** Model used (if known) */
|
|
103
|
+
model?: string;
|
|
104
|
+
/** All events in order */
|
|
105
|
+
events: TranscriptEvent[];
|
|
106
|
+
/** Derived summary statistics */
|
|
107
|
+
summary: TranscriptSummary;
|
|
108
|
+
/** Whether parsing succeeded fully */
|
|
109
|
+
parseSuccess: boolean;
|
|
110
|
+
/** Any parsing warnings/errors */
|
|
111
|
+
parseErrors?: string[];
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/lib/o11y/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;GAGG;AACH,MAAM,MAAM,QAAQ,GAChB,WAAW,GACX,YAAY,GACZ,WAAW,GACX,OAAO,GACP,WAAW,GACX,YAAY,GACZ,MAAM,GACN,MAAM,GACN,UAAU,GACV,YAAY,GACZ,SAAS,CAAC;AAEd;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,iBAAiB;IACjB,IAAI,EAAE,SAAS,GAAG,WAAW,GAAG,aAAa,GAAG,UAAU,GAAG,OAAO,CAAC;IAErE,mCAAmC;IACnC,IAAI,CAAC,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IAEvC,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,2CAA2C;IAC3C,IAAI,CAAC,EAAE;QACL,0BAA0B;QAC1B,IAAI,EAAE,QAAQ,CAAC;QACf,wCAAwC;QACxC,YAAY,EAAE,MAAM,CAAC;QACrB,qBAAqB;QACrB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC/B,2CAA2C;QAC3C,MAAM,CAAC,EAAE,OAAO,CAAC;QACjB,8CAA8C;QAC9C,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,sCAAsC;QACtC,OAAO,CAAC,EAAE,OAAO,CAAC;KACnB,CAAC;IAEF,oDAAoD;IACpD,GAAG,CAAC,EAAE,OAAO,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,+BAA+B;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,sCAAsC;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,kCAAkC;IAClC,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,gBAAgB;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB;IACrB,SAAS,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,+BAA+B;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oCAAoC;IACpC,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,yCAAyC;IACzC,UAAU,EAAE,MAAM,CAAC;IAEnB,mCAAmC;IACnC,SAAS,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAEpC,uBAAuB;IACvB,cAAc,EAAE,MAAM,CAAC;IAEvB,sCAAsC;IACtC,UAAU,EAAE,YAAY,EAAE,CAAC;IAE3B,2BAA2B;IAC3B,SAAS,EAAE,MAAM,EAAE,CAAC;IAEpB,wCAAwC;IACxC,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB,8BAA8B;IAC9B,aAAa,EAAE,gBAAgB,EAAE,CAAC;IAElC,yBAAyB;IACzB,MAAM,EAAE,MAAM,EAAE,CAAC;IAEjB,+CAA+C;IAC/C,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,0CAA0C;IAC1C,KAAK,EAAE,MAAM,CAAC;IAEd,4BAA4B;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,0BAA0B;IAC1B,MAAM,EAAE,eAAe,EAAE,CAAC;IAE1B,iCAAiC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAE3B,sCAAsC;IACtC,YAAY,EAAE,OAAO,CAAC;IAEtB,kCAAkC;IAClC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/o11y/types.ts"],"names":[],"mappings":"AAAA;;;GAGG"}
|
package/dist/lib/results.d.ts
CHANGED
|
@@ -34,7 +34,8 @@ export interface SaveResultsOptions {
|
|
|
34
34
|
* eval-1/
|
|
35
35
|
* run-1/
|
|
36
36
|
* result.json
|
|
37
|
-
* transcript.
|
|
37
|
+
* transcript.json (parsed/structured - primary format)
|
|
38
|
+
* transcript-raw.jsonl (raw agent output - for debugging)
|
|
38
39
|
* outputs/
|
|
39
40
|
* summary.json
|
|
40
41
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"results.d.ts","sourceRoot":"","sources":["../../src/lib/results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,KAAK,EACV,aAAa,EACb,WAAW,EACX,WAAW,EACX,iBAAiB,EACjB,wBAAwB,EACzB,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"results.d.ts","sourceRoot":"","sources":["../../src/lib/results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,KAAK,EACV,aAAa,EACb,WAAW,EACX,WAAW,EACX,iBAAiB,EACjB,wBAAwB,EACzB,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAGxD;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,WAAW,EAAE,cAAc,GAAG,WAAW,CA4BjF;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,WAAW,CAanF;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,wBAAwB,EAChC,KAAK,EAAE,WAAW,EAAE,EACpB,SAAS,EAAE,IAAI,EACf,WAAW,EAAE,IAAI,GAChB,iBAAiB,CAOnB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,WAAW,CACzB,OAAO,EAAE,iBAAiB,EAC1B,OAAO,EAAE,kBAAkB,GAC1B,MAAM,CAsGR;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,iBAAiB,GAAG,MAAM,CAsCrE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,aAAa,GACpB,MAAM,CAYR;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,GAChB,MAAM,CAER"}
|
package/dist/lib/results.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import { mkdirSync, writeFileSync } from 'fs';
|
|
5
5
|
import { join } from 'path';
|
|
6
6
|
import chalk from 'chalk';
|
|
7
|
+
import { parseTranscript } from './o11y/index.js';
|
|
7
8
|
/**
|
|
8
9
|
* Convert AgentRunResult to EvalRunData (result + transcript).
|
|
9
10
|
*/
|
|
@@ -70,7 +71,8 @@ export function createExperimentResults(config, evals, startedAt, completedAt) {
|
|
|
70
71
|
* eval-1/
|
|
71
72
|
* run-1/
|
|
72
73
|
* result.json
|
|
73
|
-
* transcript.
|
|
74
|
+
* transcript.json (parsed/structured - primary format)
|
|
75
|
+
* transcript-raw.jsonl (raw agent output - for debugging)
|
|
74
76
|
* outputs/
|
|
75
77
|
* summary.json
|
|
76
78
|
*/
|
|
@@ -96,12 +98,26 @@ export function saveResults(results, options) {
|
|
|
96
98
|
const runData = evalSummary.runs[i];
|
|
97
99
|
const runDir = join(evalDir, `run-${i + 1}`);
|
|
98
100
|
mkdirSync(runDir, { recursive: true });
|
|
99
|
-
// Build the result with paths
|
|
100
|
-
const resultWithPaths = {
|
|
101
|
-
|
|
101
|
+
// Build the result with paths and o11y summary
|
|
102
|
+
const resultWithPaths = {
|
|
103
|
+
...runData.result,
|
|
104
|
+
};
|
|
105
|
+
// Save transcripts if available
|
|
102
106
|
if (runData.transcript) {
|
|
103
|
-
|
|
104
|
-
|
|
107
|
+
// Parse the raw transcript
|
|
108
|
+
// Model can be string or array - use first if array
|
|
109
|
+
const model = Array.isArray(results.config.model)
|
|
110
|
+
? results.config.model[0]
|
|
111
|
+
: results.config.model;
|
|
112
|
+
const transcript = parseTranscript(runData.transcript, results.config.agent, model);
|
|
113
|
+
// Save parsed transcript as primary format (transcript.json)
|
|
114
|
+
writeFileSync(join(runDir, 'transcript.json'), JSON.stringify(transcript, null, 2));
|
|
115
|
+
resultWithPaths.transcriptPath = './transcript.json';
|
|
116
|
+
// Save raw transcript for debugging (transcript-raw.jsonl)
|
|
117
|
+
writeFileSync(join(runDir, 'transcript-raw.jsonl'), runData.transcript);
|
|
118
|
+
resultWithPaths.transcriptRawPath = './transcript-raw.jsonl';
|
|
119
|
+
// Include summary in result.json for quick access
|
|
120
|
+
resultWithPaths.o11y = transcript.summary;
|
|
105
121
|
}
|
|
106
122
|
// Save script/test outputs to outputs/
|
|
107
123
|
const outputsDir = join(runDir, 'outputs');
|
|
@@ -128,7 +144,7 @@ export function saveResults(results, options) {
|
|
|
128
144
|
resultWithPaths.outputPaths = outputPaths;
|
|
129
145
|
}
|
|
130
146
|
}
|
|
131
|
-
// Save result.json with paths
|
|
147
|
+
// Save result.json with paths and o11y summary
|
|
132
148
|
writeFileSync(join(runDir, 'result.json'), JSON.stringify(resultWithPaths, null, 2));
|
|
133
149
|
}
|
|
134
150
|
}
|
package/dist/lib/results.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"results.js","sourceRoot":"","sources":["../../src/lib/results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,KAAK,MAAM,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"results.js","sourceRoot":"","sources":["../../src/lib/results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,KAAK,MAAM,OAAO,CAAC;AAS1B,OAAO,EAAE,eAAe,EAAmB,MAAM,iBAAiB,CAAC;AAEnE;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,WAA2B;IAClE,gDAAgD;IAChD,MAAM,aAAa,GAAiC,EAAE,CAAC;IAEvD,0BAA0B;IAC1B,IAAI,WAAW,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;QACnC,aAAa,CAAC,IAAI,GAAG,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC;IACrD,CAAC;IAED,qEAAqE;IACrE,IAAI,WAAW,CAAC,cAAc,IAAI,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrF,aAAa,CAAC,OAAO,GAAG,EAAE,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,EAAE,CAAC;YACxE,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAClB,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;YAC9C,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO;QACL,MAAM,EAAE;YACN,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ;YACjD,KAAK,EAAE,WAAW,CAAC,KAAK;YACxB,QAAQ,EAAE,WAAW,CAAC,QAAQ,GAAG,IAAI,EAAE,qBAAqB;SAC7D;QACD,UAAU,EAAE,WAAW,CAAC,UAAU;QAClC,aAAa,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS;KACjF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY,EAAE,OAAsB;IACpE,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;IACpE,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;IAEnE,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,IAAI,CAAC,MAAM;QACtB,UAAU;QACV,QAAQ,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAChE,YAAY,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC/D,IAAI,EAAE,OAAO;KACd,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CACrC,MAAgC,EAChC,KAAoB,EACpB,SAAe,EACf,WAAiB;IAEjB,OAAO;QACL,SAAS,EAAE,SAAS,CAAC,WAAW,EAAE;QAClC,WAAW,EAAE,WAAW,CAAC,WAAW,EAAE;QACtC,MAAM;QACN,KAAK;KACN,CAAC;AACJ,CAAC;AAYD;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,WAAW,CACzB,OAA0B,EAC1B,OAA2B;IAE3B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACvD,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;IAElF,8BAA8B;IAC9B,SAAS,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE9C,wBAAwB;IACxB,KAAK,MAAM,WAAW,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,IAAI,CAAC,CAAC;QACtD,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAExC,8CAA8C;QAC9C,MAAM,cAAc,GAAG;YACrB,SAAS,EAAE,WAAW,CAAC,SAAS;YAChC,UAAU,EAAE,WAAW,CAAC,UAAU;YAClC,QAAQ,EAAE,GAAG,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YAC/C,YAAY,EAAE,WAAW,CAAC,YAAY;SACvC,CAAC;QACF,aAAa,CACX,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC,EAC7B,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CACxC,CAAC;QAEF,8BAA8B;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,MAAM,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACpC,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC7C,SAAS,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAEvC,+CAA+C;YAC/C,MAAM,eAAe,GAAqD;gBACxE,GAAG,OAAO,CAAC,MAAM;aAClB,CAAC;YAEF,gCAAgC;YAChC,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,2BAA2B;gBAC3B,oDAAoD;gBACpD,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC;oBAC/C,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzB,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC;gBACzB,MAAM,UAAU,GAAG,eAAe,CAChC,OAAO,CAAC,UAAU,EAClB,OAAO,CAAC,MAAM,CAAC,KAAK,EACpB,KAAK,CACN,CAAC;gBAEF,6DAA6D;gBAC7D,aAAa,CACX,IAAI,CAAC,MAAM,EAAE,iBAAiB,CAAC,EAC/B,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CACpC,CAAC;gBACF,eAAe,CAAC,cAAc,GAAG,mBAAmB,CAAC;gBAErD,2DAA2D;gBAC3D,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,sBAAsB,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;gBACxE,eAAe,CAAC,iBAAiB,GAAG,wBAAwB,CAAC;gBAE7D,kDAAkD;gBAClD,eAAe,CAAC,IAAI,GAAG,UAAU,CAAC,OAAO,CAAC;YAC5C,CAAC;YAED,uCAAuC;YACvC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YAC3C,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAE3C,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;gBAC1B,MAAM,WAAW,GAAiC,EAAE,CAAC;gBAErD,2BAA2B;gBAC3B,IAAI,OAAO,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;oBAC/B,aAAa,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;oBACxE,WAAW,CAAC,IAAI,GAAG,oBAAoB,CAAC;gBAC1C,CAAC;gBAED,sDAAsD;gBACtD,IAAI,OAAO,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC;oBAClC,WAAW,CAAC,OAAO,GAAG,EAAE,CAAC;oBACzB,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,CAAC;wBAC5E,IAAI,OAAO,EAAE,CAAC;4BACZ,MAAM,QAAQ,GAAG,GAAG,IAAI,MAAM,CAAC;4BAC/B,aAAa,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC;4BACnD,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,aAAa,QAAQ,EAAE,CAAC;wBACtD,CAAC;oBACH,CAAC;gBACH,CAAC;gBAED,IAAI,WAAW,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;oBAC7F,eAAe,CAAC,WAAW,GAAG,WAAW,CAAC;gBAC5C,CAAC;YACH,CAAC;YAED,+CAA+C;YAC/C,aAAa,CACX,IAAI,CAAC,MAAM,EAAE,aAAa,CAAC,EAC3B,IAAI,CAAC,SAAS,CAAC,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC,CACzC,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC;AACvB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAA0B;IAC3D,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAEjC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC;IAC7C,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAClC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,0BAA0B;IAC1B,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;IACzE,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IAC5E,MAAM,eAAe,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAE5E,KAAK,MAAM,WAAW,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,WAAW,CAAC,UAAU,KAAK,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAC9E,MAAM,SAAS,GAAG,WAAW,CAAC,UAAU,KAAK,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;QAE7F,KAAK,CAAC,IAAI,CACR,SAAS,CACP,GAAG,QAAQ,IAAI,WAAW,CAAC,IAAI,KAAK,WAAW,CAAC,UAAU,IAAI,WAAW,CAAC,SAAS,YAAY,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CACnI,CACF,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,oBAAoB,WAAW,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACnF,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAClC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,MAAM,YAAY,GAAG,eAAe,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;IAC9G,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,YAAY,WAAW,IAAI,SAAS,YAAY,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAEzG,MAAM,QAAQ,GAAG,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE,CAAC,GAAG,IAAI,CAAC;IAC1G,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,eAAe,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,QAAgB,EAChB,SAAiB,EACjB,SAAiB,EACjB,MAAqB;IAErB,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IACpD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;IAEnE,IAAI,IAAI,GAAG,KAAK,CAAC,GAAG,IAAI,IAAI,QAAQ,KAAK,SAAS,IAAI,SAAS,GAAG,CAAC,CAAC;IACpE,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAExD,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACjB,IAAI,IAAI,KAAK,CAAC,GAAG,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC/F,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACnC,QAAgB,EAChB,SAAiB,EACjB,SAAiB;IAEjB,OAAO,KAAK,CAAC,IAAI,CAAC,WAAW,QAAQ,KAAK,SAAS,IAAI,SAAS,MAAM,CAAC,CAAC;AAC1E,CAAC"}
|
package/dist/lib/runner.d.ts
CHANGED
|
@@ -3,13 +3,13 @@
|
|
|
3
3
|
* All evals and attempts run concurrently for maximum throughput.
|
|
4
4
|
* With earlyExit, in-flight attempts are aborted when one passes.
|
|
5
5
|
*/
|
|
6
|
-
import type { ResolvedExperimentConfig, EvalFixture, EvalRunData, ExperimentResults } from './types.js';
|
|
6
|
+
import type { ResolvedExperimentConfig, EvalFixture, EvalRunData, ExperimentResults, RunnableExperimentConfig } from './types.js';
|
|
7
7
|
/**
|
|
8
8
|
* Options for running an experiment.
|
|
9
9
|
*/
|
|
10
10
|
export interface RunExperimentOptions {
|
|
11
11
|
/** Resolved experiment configuration */
|
|
12
|
-
config:
|
|
12
|
+
config: RunnableExperimentConfig;
|
|
13
13
|
/** Fixtures to run */
|
|
14
14
|
fixtures: EvalFixture[];
|
|
15
15
|
/** API key for the agent */
|
|
@@ -31,14 +31,14 @@ export declare function runExperiment(options: RunExperimentOptions): Promise<Ex
|
|
|
31
31
|
/**
|
|
32
32
|
* Run a single eval (for testing/debugging).
|
|
33
33
|
*/
|
|
34
|
-
export declare function runSingleEval(fixture: EvalFixture, options: {
|
|
34
|
+
export declare function runSingleEval<T extends ResolvedExperimentConfig['model']>(fixture: EvalFixture, options: {
|
|
35
35
|
agent?: ResolvedExperimentConfig['agent'];
|
|
36
|
-
model:
|
|
36
|
+
model: T;
|
|
37
37
|
timeout: number;
|
|
38
38
|
apiKey: string;
|
|
39
39
|
setup?: ResolvedExperimentConfig['setup'];
|
|
40
40
|
scripts?: string[];
|
|
41
41
|
sandbox?: ResolvedExperimentConfig['sandbox'];
|
|
42
42
|
verbose?: boolean;
|
|
43
|
-
}): Promise<EvalRunData>;
|
|
43
|
+
}): Promise<T extends Array<unknown> ? EvalRunData[] : EvalRunData>;
|
|
44
44
|
//# sourceMappingURL=runner.d.ts.map
|
package/dist/lib/runner.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/lib/runner.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,wBAAwB,EACxB,WAAW,EACX,WAAW,EAEX,iBAAiB,
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/lib/runner.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,wBAAwB,EACxB,WAAW,EACX,WAAW,EAEX,iBAAiB,EACjB,wBAAwB,EACzB,MAAM,YAAY,CAAC;AAYpB;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,wCAAwC;IACxC,MAAM,EAAE,wBAAwB,CAAC;IACjC,sBAAsB;IACtB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,sBAAsB;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,oCAAoC;IACpC,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,qCAAqC;IACrC,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAoBD;;;GAGG;AACH,wBAAsB,aAAa,CACjC,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,iBAAiB,CAAC,CA+K5B;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,CAAC,SAAS,wBAAwB,CAAC,OAAO,CAAC,EAC7E,OAAO,EAAE,WAAW,EACpB,OAAO,EAAE;IACP,KAAK,CAAC,EAAE,wBAAwB,CAAC,OAAO,CAAC,CAAC;IAC1C,KAAK,EAAE,CAAC,CAAC;IACT,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,wBAAwB,CAAC,OAAO,CAAC,CAAC;IAC1C,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,wBAAwB,CAAC,SAAS,CAAC,CAAC;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,GACA,OAAO,CAAC,CAAC,SAAS,KAAK,CAAC,OAAO,CAAC,GAAG,WAAW,EAAE,GAAG,WAAW,CAAC,CA4BjE"}
|
package/dist/lib/runner.js
CHANGED
|
@@ -161,15 +161,24 @@ export async function runExperiment(options) {
|
|
|
161
161
|
*/
|
|
162
162
|
export async function runSingleEval(fixture, options) {
|
|
163
163
|
const agent = getAgent(options.agent ?? 'vercel-ai-gateway/claude-code');
|
|
164
|
-
const
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
164
|
+
const models = Array.isArray(options.model) ? options.model : [options.model];
|
|
165
|
+
const results = [];
|
|
166
|
+
for (const model of models) {
|
|
167
|
+
const agentResult = await agent.run(fixture.path, {
|
|
168
|
+
prompt: fixture.prompt,
|
|
169
|
+
model,
|
|
170
|
+
timeout: options.timeout * 1000,
|
|
171
|
+
apiKey: options.apiKey,
|
|
172
|
+
setup: options.setup,
|
|
173
|
+
scripts: options.scripts,
|
|
174
|
+
sandbox: options.sandbox,
|
|
175
|
+
});
|
|
176
|
+
results.push(agentResultToEvalRunData(agentResult));
|
|
177
|
+
}
|
|
178
|
+
// TODO: remove this on the next major and return an array directly...it's just here to prevent breaking changes
|
|
179
|
+
if (!Array.isArray(options.model)) {
|
|
180
|
+
return results[0];
|
|
181
|
+
}
|
|
182
|
+
return results;
|
|
174
183
|
}
|
|
175
184
|
//# sourceMappingURL=runner.js.map
|