evalforge 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,3 @@
1
1
  export { fromMastra } from './mastra';
2
+ export { fromVercel } from './vercel';
2
3
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
@@ -1,6 +1,8 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.fromMastra = void 0;
3
+ exports.fromVercel = exports.fromMastra = void 0;
4
4
  var mastra_1 = require("./mastra");
5
5
  Object.defineProperty(exports, "fromMastra", { enumerable: true, get: function () { return mastra_1.fromMastra; } });
6
+ var vercel_1 = require("./vercel");
7
+ Object.defineProperty(exports, "fromVercel", { enumerable: true, get: function () { return vercel_1.fromVercel; } });
6
8
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA;AACnB,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Vercel AI SDK adapter for EvalForge.
3
+ *
4
+ * Usage:
5
+ * import { generateText, streamText } from 'ai';
6
+ * import { fromVercel } from 'evalforge';
7
+ * import { run } from 'evalforge';
8
+ *
9
+ * const result = await generateText({
10
+ * model: openai('gpt-4o'),
11
+ * prompt: 'What is the capital of France?',
12
+ * tools: { webSearch: ... }
13
+ * });
14
+ *
15
+ * const tracePath = fromVercel(result, {
16
+ * question: 'What is the capital of France?',
17
+ * model: 'gpt-4o'
18
+ * });
19
+ * const evalResult = run(tracePath, { metrics: ['faithfulness'] });
20
+ */
21
+ export interface VercelAdapterOptions {
22
+ agentName?: string;
23
+ model?: string;
24
+ question?: string;
25
+ expectedTools?: string[];
26
+ }
27
+ export declare function fromVercel(result: any, options?: VercelAdapterOptions): string;
28
+ //# sourceMappingURL=vercel.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vercel.d.ts","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAOH,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,wBAAgB,UAAU,CACxB,MAAM,EAAE,GAAG,EACX,OAAO,GAAE,oBAAyB,GACjC,MAAM,CAiHR"}
@@ -0,0 +1,158 @@
1
+ "use strict";
2
+ /**
3
+ * Vercel AI SDK adapter for EvalForge.
4
+ *
5
+ * Usage:
6
+ * import { generateText, streamText } from 'ai';
7
+ * import { fromVercel } from 'evalforge';
8
+ * import { run } from 'evalforge';
9
+ *
10
+ * const result = await generateText({
11
+ * model: openai('gpt-4o'),
12
+ * prompt: 'What is the capital of France?',
13
+ * tools: { webSearch: ... }
14
+ * });
15
+ *
16
+ * const tracePath = fromVercel(result, {
17
+ * question: 'What is the capital of France?',
18
+ * model: 'gpt-4o'
19
+ * });
20
+ * const evalResult = run(tracePath, { metrics: ['faithfulness'] });
21
+ */
22
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
23
+ if (k2 === undefined) k2 = k;
24
+ var desc = Object.getOwnPropertyDescriptor(m, k);
25
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
26
+ desc = { enumerable: true, get: function() { return m[k]; } };
27
+ }
28
+ Object.defineProperty(o, k2, desc);
29
+ }) : (function(o, m, k, k2) {
30
+ if (k2 === undefined) k2 = k;
31
+ o[k2] = m[k];
32
+ }));
33
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
34
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
35
+ }) : function(o, v) {
36
+ o["default"] = v;
37
+ });
38
+ var __importStar = (this && this.__importStar) || (function () {
39
+ var ownKeys = function(o) {
40
+ ownKeys = Object.getOwnPropertyNames || function (o) {
41
+ var ar = [];
42
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
43
+ return ar;
44
+ };
45
+ return ownKeys(o);
46
+ };
47
+ return function (mod) {
48
+ if (mod && mod.__esModule) return mod;
49
+ var result = {};
50
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
51
+ __setModuleDefault(result, mod);
52
+ return result;
53
+ };
54
+ })();
55
+ Object.defineProperty(exports, "__esModule", { value: true });
56
+ exports.fromVercel = fromVercel;
57
+ const fs = __importStar(require("fs"));
58
+ const os = __importStar(require("os"));
59
+ const path = __importStar(require("path"));
60
+ function fromVercel(result, options = {}) {
61
+ /**
62
+ * Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
63
+ *
64
+ * Supports:
65
+ * - result.text — final answer
66
+ * - result.toolCalls — array of tool invocations
67
+ * - result.toolResults — array of tool results
68
+ * - result.steps — array of reasoning steps
69
+ * - result.usage — token usage stats
70
+ * - result.finishReason — why generation stopped
71
+ */
72
+ const { agentName = 'vercel-agent', model = 'unknown', question = '', expectedTools = [], } = options;
73
+ const steps = [];
74
+ let stepId = 1;
75
+ let finalAnswer = '';
76
+ let totalTokens = 0;
77
+ // Extract final answer
78
+ if (result?.text) {
79
+ finalAnswer = result.text;
80
+ }
81
+ else if (typeof result === 'string') {
82
+ finalAnswer = result;
83
+ }
84
+ // Extract token usage
85
+ if (result?.usage) {
86
+ totalTokens = (result.usage.promptTokens ?? 0) +
87
+ (result.usage.completionTokens ?? 0);
88
+ }
89
+ // Extract tool calls and results
90
+ const toolCalls = result?.toolCalls ?? [];
91
+ const toolResults = result?.toolResults ?? [];
92
+ for (let i = 0; i < toolCalls.length; i++) {
93
+ const tc = toolCalls[i];
94
+ const tr = toolResults[i];
95
+ steps.push({
96
+ step_id: stepId++,
97
+ type: 'tool_call',
98
+ tool: tc.toolName ?? tc.name ?? 'unknown_tool',
99
+ input: tc.args ?? tc.input ?? {},
100
+ output: tr ? { result: String(tr.result ?? '') } : {},
101
+ duration_ms: 0,
102
+ });
103
+ }
104
+ // Extract steps if present (multi-step generations)
105
+ if (Array.isArray(result?.steps)) {
106
+ for (const step of result.steps) {
107
+ if (step.text && step.text !== finalAnswer) {
108
+ steps.push({
109
+ step_id: stepId++,
110
+ type: 'thought',
111
+ content: step.text,
112
+ });
113
+ }
114
+ // Tool calls inside steps
115
+ for (const tc of step.toolCalls ?? []) {
116
+ const tr = (step.toolResults ?? []).find((r) => r.toolCallId === tc.toolCallId);
117
+ steps.push({
118
+ step_id: stepId++,
119
+ type: 'tool_call',
120
+ tool: tc.toolName ?? 'unknown_tool',
121
+ input: tc.args ?? {},
122
+ output: tr ? { result: String(tr.result ?? '') } : {},
123
+ duration_ms: 0,
124
+ });
125
+ }
126
+ }
127
+ }
128
+ const trace = {
129
+ evalforge_version: '0.1',
130
+ trace_id: `vercel-${Date.now()}`,
131
+ timestamp: new Date().toISOString(),
132
+ metadata: {
133
+ framework: 'vercel-ai',
134
+ model,
135
+ agent_name: agentName,
136
+ duration_ms: 0,
137
+ total_tokens: totalTokens,
138
+ },
139
+ input: {
140
+ user: question,
141
+ system: '',
142
+ },
143
+ steps,
144
+ output: {
145
+ answer: finalAnswer,
146
+ finish_reason: result?.finishReason,
147
+ },
148
+ eval_hints: {
149
+ expected_tools: expectedTools,
150
+ expected_answer: null,
151
+ context_documents: [],
152
+ },
153
+ };
154
+ const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
155
+ fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
156
+ return tmp;
157
+ }
158
+ //# sourceMappingURL=vercel.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vercel.js","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAcH,gCAoHC;AAhID,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAU7B,SAAgB,UAAU,CACxB,MAAW,EACX,UAAgC,EAAE;IAElC;;;;;;;;;;OAUG;IAEH,MAAM,EACJ,SAAS,GAAG,cAAc,EAC1B,KAAK,GAAG,SAAS,EACjB,QAAQ,GAAG,EAAE,EACb,aAAa,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uBAAuB;IACvB,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QACjB,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACtC,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,sBAAsB;IACtB,IAAI,MAAM,EAAE,KAAK,EAAE,CAAC;QAClB,WAAW,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;YAChC,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC;IAC1C,MAAM,WAAW,GAAG,MAAM,EAAE,WAAW,IAAI,EAAE,CAAC;IAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAE1B,KAAK,CAAC,IAAI,CAAC;YACT,OAAO,EAAE,MAAM,EAAE;YACjB,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,IAAI,cAAc;YAC9C,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE;YAChC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;YACrD,WAAW,EAAE,CAAC;SACf,CAAC,CAAC;IACL,CAAC;IAED,oDAAoD;IACpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC3C,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE,IAAI,CAAC,IAAI;iBACnB,CAAC,CAAC;YACL,CAAC;YACD,0BAA0B;YAC1B,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,SAAS,IAAI,EAAE,EAAE,CAAC;gBACtC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,CACtC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,EAAE,CAAC,UAAU,CAC3C,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,WAAW;oBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,cAAc;oBACnC,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;oBACpB,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;oBACrD,WAAW,EAAE,CAAC;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE;QAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,WAAW;YACtB,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,WAAW;SAC1B;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,EAAE;SACX;QACD,KAAK;QACL,MAAM,EAAE;YACN,MAAM,EAAE,WAAW;YACnB,aAAa,EAAE,MAAM,EAAE,YAAY;SACpC;QACD,UAAU,EAAE;YACV,cAAc,EAAE,aAAa;YAC7B,eAAe,EAAE,IAAI;YACrB,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,oBAAoB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1E,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC;AACb,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "evalforge",
3
- "version": "0.7.1",
3
+ "version": "0.7.2",
4
4
  "description": "Framework-agnostic LLM agent evaluation harness",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1 +1,2 @@
1
1
  export { fromMastra } from './mastra';
2
+ export { fromVercel } from './vercel';
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Vercel AI SDK adapter for EvalForge.
3
+ *
4
+ * Usage:
5
+ * import { generateText, streamText } from 'ai';
6
+ * import { fromVercel } from 'evalforge';
7
+ * import { run } from 'evalforge';
8
+ *
9
+ * const result = await generateText({
10
+ * model: openai('gpt-4o'),
11
+ * prompt: 'What is the capital of France?',
12
+ * tools: { webSearch: ... }
13
+ * });
14
+ *
15
+ * const tracePath = fromVercel(result, {
16
+ * question: 'What is the capital of France?',
17
+ * model: 'gpt-4o'
18
+ * });
19
+ * const evalResult = run(tracePath, { metrics: ['faithfulness'] });
20
+ */
21
+
22
+ import * as fs from 'fs';
23
+ import * as os from 'os';
24
+ import * as path from 'path';
25
+ import { Trace } from '../types';
26
+
27
+ export interface VercelAdapterOptions {
28
+ agentName?: string;
29
+ model?: string;
30
+ question?: string;
31
+ expectedTools?: string[];
32
+ }
33
+
34
+ export function fromVercel(
35
+ result: any,
36
+ options: VercelAdapterOptions = {}
37
+ ): string {
38
+ /**
39
+ * Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
40
+ *
41
+ * Supports:
42
+ * - result.text — final answer
43
+ * - result.toolCalls — array of tool invocations
44
+ * - result.toolResults — array of tool results
45
+ * - result.steps — array of reasoning steps
46
+ * - result.usage — token usage stats
47
+ * - result.finishReason — why generation stopped
48
+ */
49
+
50
+ const {
51
+ agentName = 'vercel-agent',
52
+ model = 'unknown',
53
+ question = '',
54
+ expectedTools = [],
55
+ } = options;
56
+
57
+ const steps: Trace['steps'] = [];
58
+ let stepId = 1;
59
+ let finalAnswer = '';
60
+ let totalTokens = 0;
61
+
62
+ // Extract final answer
63
+ if (result?.text) {
64
+ finalAnswer = result.text;
65
+ } else if (typeof result === 'string') {
66
+ finalAnswer = result;
67
+ }
68
+
69
+ // Extract token usage
70
+ if (result?.usage) {
71
+ totalTokens = (result.usage.promptTokens ?? 0) +
72
+ (result.usage.completionTokens ?? 0);
73
+ }
74
+
75
+ // Extract tool calls and results
76
+ const toolCalls = result?.toolCalls ?? [];
77
+ const toolResults = result?.toolResults ?? [];
78
+
79
+ for (let i = 0; i < toolCalls.length; i++) {
80
+ const tc = toolCalls[i];
81
+ const tr = toolResults[i];
82
+
83
+ steps.push({
84
+ step_id: stepId++,
85
+ type: 'tool_call',
86
+ tool: tc.toolName ?? tc.name ?? 'unknown_tool',
87
+ input: tc.args ?? tc.input ?? {},
88
+ output: tr ? { result: String(tr.result ?? '') } : {},
89
+ duration_ms: 0,
90
+ });
91
+ }
92
+
93
+ // Extract steps if present (multi-step generations)
94
+ if (Array.isArray(result?.steps)) {
95
+ for (const step of result.steps) {
96
+ if (step.text && step.text !== finalAnswer) {
97
+ steps.push({
98
+ step_id: stepId++,
99
+ type: 'thought',
100
+ content: step.text,
101
+ });
102
+ }
103
+ // Tool calls inside steps
104
+ for (const tc of step.toolCalls ?? []) {
105
+ const tr = (step.toolResults ?? []).find(
106
+ (r: any) => r.toolCallId === tc.toolCallId
107
+ );
108
+ steps.push({
109
+ step_id: stepId++,
110
+ type: 'tool_call',
111
+ tool: tc.toolName ?? 'unknown_tool',
112
+ input: tc.args ?? {},
113
+ output: tr ? { result: String(tr.result ?? '') } : {},
114
+ duration_ms: 0,
115
+ });
116
+ }
117
+ }
118
+ }
119
+
120
+ const trace: Trace = {
121
+ evalforge_version: '0.1',
122
+ trace_id: `vercel-${Date.now()}`,
123
+ timestamp: new Date().toISOString(),
124
+ metadata: {
125
+ framework: 'vercel-ai',
126
+ model,
127
+ agent_name: agentName,
128
+ duration_ms: 0,
129
+ total_tokens: totalTokens,
130
+ },
131
+ input: {
132
+ user: question,
133
+ system: '',
134
+ },
135
+ steps,
136
+ output: {
137
+ answer: finalAnswer,
138
+ finish_reason: result?.finishReason,
139
+ },
140
+ eval_hints: {
141
+ expected_tools: expectedTools,
142
+ expected_answer: null,
143
+ context_documents: [],
144
+ },
145
+ };
146
+
147
+ const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
148
+ fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
149
+ return tmp;
150
+ }
@@ -3,7 +3,7 @@ const assert = require('node:assert');
3
3
  const path = require('path');
4
4
  const fs = require('fs');
5
5
 
6
- const { fromMastra } = require('../dist/adapters/index.js');
6
+ const { fromMastra, fromVercel } = require('../dist/adapters/index.js');
7
7
 
8
8
  test('fromMastra with text result', () => {
9
9
  const result = { text: 'The capital of France is Paris.' };
@@ -53,3 +53,58 @@ test('fromMastra with token usage', () => {
53
53
  const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
54
54
  assert.strictEqual(trace.metadata.total_tokens, 150);
55
55
  });
56
+
57
+ test('fromVercel with text result', () => {
58
+ const result = {
59
+ text: 'The capital of France is Paris.',
60
+ usage: { promptTokens: 50, completionTokens: 20 },
61
+ finishReason: 'stop'
62
+ };
63
+ const tracePath = fromVercel(result, {
64
+ question: 'What is the capital of France?',
65
+ model: 'gpt-4o'
66
+ });
67
+ const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
68
+ assert.strictEqual(trace.metadata.framework, 'vercel-ai');
69
+ assert.strictEqual(trace.output.answer, 'The capital of France is Paris.');
70
+ assert.strictEqual(trace.metadata.total_tokens, 70);
71
+ assert.strictEqual(trace.output.finish_reason, 'stop');
72
+ });
73
+
74
+ test('fromVercel with tool calls', () => {
75
+ const result = {
76
+ text: 'Canberra is the capital.',
77
+ toolCalls: [{
78
+ toolName: 'web_search',
79
+ args: { query: 'capital of Australia' }
80
+ }],
81
+ toolResults: [{
82
+ result: 'Canberra is the capital of Australia.'
83
+ }]
84
+ };
85
+ const tracePath = fromVercel(result, {
86
+ question: 'Capital of Australia?',
87
+ expectedTools: ['web_search']
88
+ });
89
+ const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
90
+ assert.strictEqual(trace.steps.length, 1);
91
+ assert.strictEqual(trace.steps[0].tool, 'web_search');
92
+ assert.deepStrictEqual(trace.eval_hints.expected_tools, ['web_search']);
93
+ });
94
+
95
+ test('fromVercel with multi-step result', () => {
96
+ const result = {
97
+ text: 'Final answer.',
98
+ steps: [
99
+ { text: 'Thinking...', toolCalls: [], toolResults: [] },
100
+ {
101
+ text: '',
102
+ toolCalls: [{ toolName: 'search', toolCallId: '1', args: { q: 'test' } }],
103
+ toolResults: [{ toolCallId: '1', result: 'search result' }]
104
+ }
105
+ ]
106
+ };
107
+ const tracePath = fromVercel(result, { question: 'Test?' });
108
+ const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
109
+ assert.ok(trace.steps.length >= 1);
110
+ });