npm - evalforge - Versions diffs - 0.7.1 → 0.8.0 - Mend

evalforge 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/adapters/index.d.ts +1 -0
package/dist/adapters/index.d.ts.map +1 -1
package/dist/adapters/index.js +3 -1
package/dist/adapters/index.js.map +1 -1
package/dist/adapters/vercel.d.ts +28 -0
package/dist/adapters/vercel.d.ts.map +1 -0
package/dist/adapters/vercel.js +158 -0
package/dist/adapters/vercel.js.map +1 -0
package/package.json +1 -1
package/src/adapters/index.ts +1 -0
package/src/adapters/vercel.ts +150 -0
package/tests/adapters.test.js +56 -1

package/dist/adapters/index.d.ts CHANGED Viewed

@@ -1,2 +1,3 @@
 export { fromMastra } from './mastra';
+export { fromVercel } from './vercel';
 //# sourceMappingURL=index.d.ts.map

package/dist/adapters/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}

package/dist/adapters/index.js CHANGED Viewed

@@ -1,6 +1,8 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.fromMastra = void 0;
+exports.fromVercel = exports.fromMastra = void 0;
 var mastra_1 = require("./mastra");
 Object.defineProperty(exports, "fromMastra", { enumerable: true, get: function () { return mastra_1.fromMastra; } });
+var vercel_1 = require("./vercel");
+Object.defineProperty(exports, "fromVercel", { enumerable: true, get: function () { return vercel_1.fromVercel; } });
 //# sourceMappingURL=index.js.map

package/dist/adapters/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}
1	+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA;AACnB,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}

package/dist/adapters/vercel.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Vercel AI SDK adapter for EvalForge.
+ *
+ * Usage:
+ *   import { generateText, streamText } from 'ai';
+ *   import { fromVercel } from 'evalforge';
+ *   import { run } from 'evalforge';
+ *
+ *   const result = await generateText({
+ *     model: openai('gpt-4o'),
+ *     prompt: 'What is the capital of France?',
+ *     tools: { webSearch: ... }
+ *   });
+ *
+ *   const tracePath = fromVercel(result, {
+ *     question: 'What is the capital of France?',
+ *     model: 'gpt-4o'
+ *   });
+ *   const evalResult = run(tracePath, { metrics: ['faithfulness'] });
+ */
+export interface VercelAdapterOptions {
+    agentName?: string;
+    model?: string;
+    question?: string;
+    expectedTools?: string[];
+}
+export declare function fromVercel(result: any, options?: VercelAdapterOptions): string;
+//# sourceMappingURL=vercel.d.ts.map

package/dist/adapters/vercel.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"vercel.d.ts","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAOH,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,wBAAgB,UAAU,CACxB,MAAM,EAAE,GAAG,EACX,OAAO,GAAE,oBAAyB,GACjC,MAAM,CAiHR"}

package/dist/adapters/vercel.js ADDED Viewed

@@ -0,0 +1,158 @@
+"use strict";
+/**
+ * Vercel AI SDK adapter for EvalForge.
+ *
+ * Usage:
+ *   import { generateText, streamText } from 'ai';
+ *   import { fromVercel } from 'evalforge';
+ *   import { run } from 'evalforge';
+ *
+ *   const result = await generateText({
+ *     model: openai('gpt-4o'),
+ *     prompt: 'What is the capital of France?',
+ *     tools: { webSearch: ... }
+ *   });
+ *
+ *   const tracePath = fromVercel(result, {
+ *     question: 'What is the capital of France?',
+ *     model: 'gpt-4o'
+ *   });
+ *   const evalResult = run(tracePath, { metrics: ['faithfulness'] });
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.fromVercel = fromVercel;
+const fs = __importStar(require("fs"));
+const os = __importStar(require("os"));
+const path = __importStar(require("path"));
+function fromVercel(result, options = {}) {
+    /**
+     * Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
+     *
+     * Supports:
+     * - result.text — final answer
+     * - result.toolCalls — array of tool invocations
+     * - result.toolResults — array of tool results
+     * - result.steps — array of reasoning steps
+     * - result.usage — token usage stats
+     * - result.finishReason — why generation stopped
+     */
+    const { agentName = 'vercel-agent', model = 'unknown', question = '', expectedTools = [], } = options;
+    const steps = [];
+    let stepId = 1;
+    let finalAnswer = '';
+    let totalTokens = 0;
+    // Extract final answer
+    if (result?.text) {
+        finalAnswer = result.text;
+    }
+    else if (typeof result === 'string') {
+        finalAnswer = result;
+    }
+    // Extract token usage
+    if (result?.usage) {
+        totalTokens = (result.usage.promptTokens ?? 0) +
+            (result.usage.completionTokens ?? 0);
+    }
+    // Extract tool calls and results
+    const toolCalls = result?.toolCalls ?? [];
+    const toolResults = result?.toolResults ?? [];
+    for (let i = 0; i < toolCalls.length; i++) {
+        const tc = toolCalls[i];
+        const tr = toolResults[i];
+        steps.push({
+            step_id: stepId++,
+            type: 'tool_call',
+            tool: tc.toolName ?? tc.name ?? 'unknown_tool',
+            input: tc.args ?? tc.input ?? {},
+            output: tr ? { result: String(tr.result ?? '') } : {},
+            duration_ms: 0,
+        });
+    }
+    // Extract steps if present (multi-step generations)
+    if (Array.isArray(result?.steps)) {
+        for (const step of result.steps) {
+            if (step.text && step.text !== finalAnswer) {
+                steps.push({
+                    step_id: stepId++,
+                    type: 'thought',
+                    content: step.text,
+                });
+            }
+            // Tool calls inside steps
+            for (const tc of step.toolCalls ?? []) {
+                const tr = (step.toolResults ?? []).find((r) => r.toolCallId === tc.toolCallId);
+                steps.push({
+                    step_id: stepId++,
+                    type: 'tool_call',
+                    tool: tc.toolName ?? 'unknown_tool',
+                    input: tc.args ?? {},
+                    output: tr ? { result: String(tr.result ?? '') } : {},
+                    duration_ms: 0,
+                });
+            }
+        }
+    }
+    const trace = {
+        evalforge_version: '0.1',
+        trace_id: `vercel-${Date.now()}`,
+        timestamp: new Date().toISOString(),
+        metadata: {
+            framework: 'vercel-ai',
+            model,
+            agent_name: agentName,
+            duration_ms: 0,
+            total_tokens: totalTokens,
+        },
+        input: {
+            user: question,
+            system: '',
+        },
+        steps,
+        output: {
+            answer: finalAnswer,
+            finish_reason: result?.finishReason,
+        },
+        eval_hints: {
+            expected_tools: expectedTools,
+            expected_answer: null,
+            context_documents: [],
+        },
+    };
+    const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
+    fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
+    return tmp;
+}
+//# sourceMappingURL=vercel.js.map

package/dist/adapters/vercel.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"vercel.js","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAcH,gCAoHC;AAhID,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAU7B,SAAgB,UAAU,CACxB,MAAW,EACX,UAAgC,EAAE;IAElC;;;;;;;;;;OAUG;IAEH,MAAM,EACJ,SAAS,GAAG,cAAc,EAC1B,KAAK,GAAG,SAAS,EACjB,QAAQ,GAAG,EAAE,EACb,aAAa,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uBAAuB;IACvB,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QACjB,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACtC,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,sBAAsB;IACtB,IAAI,MAAM,EAAE,KAAK,EAAE,CAAC;QAClB,WAAW,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;YAChC,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC;IAC1C,MAAM,WAAW,GAAG,MAAM,EAAE,WAAW,IAAI,EAAE,CAAC;IAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAE1B,KAAK,CAAC,IAAI,CAAC;YACT,OAAO,EAAE,MAAM,EAAE;YACjB,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,IAAI,cAAc;YAC9C,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE;YAChC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;YACrD,WAAW,EAAE,CAAC;SACf,CAAC,CAAC;IACL,CAAC;IAED,oDAAoD;IACpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC3C,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE,IAAI,CAAC,IAAI;iBACnB,CAAC,CAAC;YACL,CAAC;YACD,0BAA0B;YAC1B,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,SAAS,IAAI,EAAE,EAAE,CAAC;gBACtC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,CACtC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,EAAE,CAAC,UAAU,CAC3C,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,WAAW;oBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,cAAc;oBACnC,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;oBACpB,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;oBACrD,WAAW,EAAE,CAAC;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE;QAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,WAAW;YACtB,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,WAAW;SAC1B;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,EAAE;SACX;QACD,KAAK;QACL,MAAM,EAAE;YACN,MAAM,EAAE,WAAW;YACnB,aAAa,EAAE,MAAM,EAAE,YAAY;SACpC;QACD,UAAU,EAAE;YACV,cAAc,EAAE,aAAa;YAC7B,eAAe,EAAE,IAAI;YACrB,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,oBAAoB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1E,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC;AACb,CAAC"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "evalforge",
-  "version": "0.7.1",
+  "version": "0.8.0",
   "description": "Framework-agnostic LLM agent evaluation harness",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/src/adapters/index.ts CHANGED Viewed

	@@ -1 +1,2 @@
1 1	export { fromMastra } from './mastra';
2	+ export { fromVercel } from './vercel';

package/src/adapters/vercel.ts ADDED Viewed

@@ -0,0 +1,150 @@
+/**
+ * Vercel AI SDK adapter for EvalForge.
+ *
+ * Usage:
+ *   import { generateText, streamText } from 'ai';
+ *   import { fromVercel } from 'evalforge';
+ *   import { run } from 'evalforge';
+ *
+ *   const result = await generateText({
+ *     model: openai('gpt-4o'),
+ *     prompt: 'What is the capital of France?',
+ *     tools: { webSearch: ... }
+ *   });
+ *
+ *   const tracePath = fromVercel(result, {
+ *     question: 'What is the capital of France?',
+ *     model: 'gpt-4o'
+ *   });
+ *   const evalResult = run(tracePath, { metrics: ['faithfulness'] });
+ */
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { Trace } from '../types';
+export interface VercelAdapterOptions {
+  agentName?: string;
+  model?: string;
+  question?: string;
+  expectedTools?: string[];
+}
+export function fromVercel(
+  result: any,
+  options: VercelAdapterOptions = {}
+): string {
+  /**
+   * Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
+   *
+   * Supports:
+   * - result.text — final answer
+   * - result.toolCalls — array of tool invocations
+   * - result.toolResults — array of tool results
+   * - result.steps — array of reasoning steps
+   * - result.usage — token usage stats
+   * - result.finishReason — why generation stopped
+   */
+  const {
+    agentName = 'vercel-agent',
+    model = 'unknown',
+    question = '',
+    expectedTools = [],
+  } = options;
+  const steps: Trace['steps'] = [];
+  let stepId = 1;
+  let finalAnswer = '';
+  let totalTokens = 0;
+  // Extract final answer
+  if (result?.text) {
+    finalAnswer = result.text;
+  } else if (typeof result === 'string') {
+    finalAnswer = result;
+  }
+  // Extract token usage
+  if (result?.usage) {
+    totalTokens = (result.usage.promptTokens ?? 0) +
+                  (result.usage.completionTokens ?? 0);
+  }
+  // Extract tool calls and results
+  const toolCalls = result?.toolCalls ?? [];
+  const toolResults = result?.toolResults ?? [];
+  for (let i = 0; i < toolCalls.length; i++) {
+    const tc = toolCalls[i];
+    const tr = toolResults[i];
+    steps.push({
+      step_id: stepId++,
+      type: 'tool_call',
+      tool: tc.toolName ?? tc.name ?? 'unknown_tool',
+      input: tc.args ?? tc.input ?? {},
+      output: tr ? { result: String(tr.result ?? '') } : {},
+      duration_ms: 0,
+    });
+  }
+  // Extract steps if present (multi-step generations)
+  if (Array.isArray(result?.steps)) {
+    for (const step of result.steps) {
+      if (step.text && step.text !== finalAnswer) {
+        steps.push({
+          step_id: stepId++,
+          type: 'thought',
+          content: step.text,
+        });
+      }
+      // Tool calls inside steps
+      for (const tc of step.toolCalls ?? []) {
+        const tr = (step.toolResults ?? []).find(
+          (r: any) => r.toolCallId === tc.toolCallId
+        );
+        steps.push({
+          step_id: stepId++,
+          type: 'tool_call',
+          tool: tc.toolName ?? 'unknown_tool',
+          input: tc.args ?? {},
+          output: tr ? { result: String(tr.result ?? '') } : {},
+          duration_ms: 0,
+        });
+      }
+    }
+  }
+  const trace: Trace = {
+    evalforge_version: '0.1',
+    trace_id: `vercel-${Date.now()}`,
+    timestamp: new Date().toISOString(),
+    metadata: {
+      framework: 'vercel-ai',
+      model,
+      agent_name: agentName,
+      duration_ms: 0,
+      total_tokens: totalTokens,
+    },
+    input: {
+      user: question,
+      system: '',
+    },
+    steps,
+    output: {
+      answer: finalAnswer,
+      finish_reason: result?.finishReason,
+    },
+    eval_hints: {
+      expected_tools: expectedTools,
+      expected_answer: null,
+      context_documents: [],
+    },
+  };
+  const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
+  fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
+  return tmp;
+}

package/tests/adapters.test.js CHANGED Viewed

@@ -3,7 +3,7 @@ const assert = require('node:assert');
 const path = require('path');
 const fs = require('fs');
-const { fromMastra } = require('../dist/adapters/index.js');
+const { fromMastra, fromVercel } = require('../dist/adapters/index.js');
 test('fromMastra with text result', () => {
   const result = { text: 'The capital of France is Paris.' };
@@ -53,3 +53,58 @@ test('fromMastra with token usage', () => {
   const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
   assert.strictEqual(trace.metadata.total_tokens, 150);
 });
+test('fromVercel with text result', () => {
+  const result = {
+    text: 'The capital of France is Paris.',
+    usage: { promptTokens: 50, completionTokens: 20 },
+    finishReason: 'stop'
+  };
+  const tracePath = fromVercel(result, {
+    question: 'What is the capital of France?',
+    model: 'gpt-4o'
+  });
+  const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
+  assert.strictEqual(trace.metadata.framework, 'vercel-ai');
+  assert.strictEqual(trace.output.answer, 'The capital of France is Paris.');
+  assert.strictEqual(trace.metadata.total_tokens, 70);
+  assert.strictEqual(trace.output.finish_reason, 'stop');
+});
+test('fromVercel with tool calls', () => {
+  const result = {
+    text: 'Canberra is the capital.',
+    toolCalls: [{
+      toolName: 'web_search',
+      args: { query: 'capital of Australia' }
+    }],
+    toolResults: [{
+      result: 'Canberra is the capital of Australia.'
+    }]
+  };
+  const tracePath = fromVercel(result, {
+    question: 'Capital of Australia?',
+    expectedTools: ['web_search']
+  });
+  const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
+  assert.strictEqual(trace.steps.length, 1);
+  assert.strictEqual(trace.steps[0].tool, 'web_search');
+  assert.deepStrictEqual(trace.eval_hints.expected_tools, ['web_search']);
+});
+test('fromVercel with multi-step result', () => {
+  const result = {
+    text: 'Final answer.',
+    steps: [
+      { text: 'Thinking...', toolCalls: [], toolResults: [] },
+      {
+        text: '',
+        toolCalls: [{ toolName: 'search', toolCallId: '1', args: { q: 'test' } }],
+        toolResults: [{ toolCallId: '1', result: 'search result' }]
+      }
+    ]
+  };
+  const tracePath = fromVercel(result, { question: 'Test?' });
+  const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
+  assert.ok(trace.steps.length >= 1);
+});