evalforge 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.d.ts +1 -0
- package/dist/adapters/index.d.ts.map +1 -1
- package/dist/adapters/index.js +3 -1
- package/dist/adapters/index.js.map +1 -1
- package/dist/adapters/vercel.d.ts +28 -0
- package/dist/adapters/vercel.d.ts.map +1 -0
- package/dist/adapters/vercel.js +158 -0
- package/dist/adapters/vercel.js.map +1 -0
- package/package.json +1 -1
- package/src/adapters/index.ts +1 -0
- package/src/adapters/vercel.ts +150 -0
- package/tests/adapters.test.js +56 -1
package/dist/adapters/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
|
package/dist/adapters/index.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.fromMastra = void 0;
|
|
3
|
+
exports.fromVercel = exports.fromMastra = void 0;
|
|
4
4
|
var mastra_1 = require("./mastra");
|
|
5
5
|
Object.defineProperty(exports, "fromMastra", { enumerable: true, get: function () { return mastra_1.fromMastra; } });
|
|
6
|
+
var vercel_1 = require("./vercel");
|
|
7
|
+
Object.defineProperty(exports, "fromVercel", { enumerable: true, get: function () { return vercel_1.fromVercel; } });
|
|
6
8
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA;AACnB,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vercel AI SDK adapter for EvalForge.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { generateText, streamText } from 'ai';
|
|
6
|
+
* import { fromVercel } from 'evalforge';
|
|
7
|
+
* import { run } from 'evalforge';
|
|
8
|
+
*
|
|
9
|
+
* const result = await generateText({
|
|
10
|
+
* model: openai('gpt-4o'),
|
|
11
|
+
* prompt: 'What is the capital of France?',
|
|
12
|
+
* tools: { webSearch: ... }
|
|
13
|
+
* });
|
|
14
|
+
*
|
|
15
|
+
* const tracePath = fromVercel(result, {
|
|
16
|
+
* question: 'What is the capital of France?',
|
|
17
|
+
* model: 'gpt-4o'
|
|
18
|
+
* });
|
|
19
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
20
|
+
*/
|
|
21
|
+
export interface VercelAdapterOptions {
|
|
22
|
+
agentName?: string;
|
|
23
|
+
model?: string;
|
|
24
|
+
question?: string;
|
|
25
|
+
expectedTools?: string[];
|
|
26
|
+
}
|
|
27
|
+
export declare function fromVercel(result: any, options?: VercelAdapterOptions): string;
|
|
28
|
+
//# sourceMappingURL=vercel.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vercel.d.ts","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAOH,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,wBAAgB,UAAU,CACxB,MAAM,EAAE,GAAG,EACX,OAAO,GAAE,oBAAyB,GACjC,MAAM,CAiHR"}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Vercel AI SDK adapter for EvalForge.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* import { generateText, streamText } from 'ai';
|
|
7
|
+
* import { fromVercel } from 'evalforge';
|
|
8
|
+
* import { run } from 'evalforge';
|
|
9
|
+
*
|
|
10
|
+
* const result = await generateText({
|
|
11
|
+
* model: openai('gpt-4o'),
|
|
12
|
+
* prompt: 'What is the capital of France?',
|
|
13
|
+
* tools: { webSearch: ... }
|
|
14
|
+
* });
|
|
15
|
+
*
|
|
16
|
+
* const tracePath = fromVercel(result, {
|
|
17
|
+
* question: 'What is the capital of France?',
|
|
18
|
+
* model: 'gpt-4o'
|
|
19
|
+
* });
|
|
20
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
21
|
+
*/
|
|
22
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
25
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
26
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
27
|
+
}
|
|
28
|
+
Object.defineProperty(o, k2, desc);
|
|
29
|
+
}) : (function(o, m, k, k2) {
|
|
30
|
+
if (k2 === undefined) k2 = k;
|
|
31
|
+
o[k2] = m[k];
|
|
32
|
+
}));
|
|
33
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
34
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
35
|
+
}) : function(o, v) {
|
|
36
|
+
o["default"] = v;
|
|
37
|
+
});
|
|
38
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
39
|
+
var ownKeys = function(o) {
|
|
40
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
41
|
+
var ar = [];
|
|
42
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
43
|
+
return ar;
|
|
44
|
+
};
|
|
45
|
+
return ownKeys(o);
|
|
46
|
+
};
|
|
47
|
+
return function (mod) {
|
|
48
|
+
if (mod && mod.__esModule) return mod;
|
|
49
|
+
var result = {};
|
|
50
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
51
|
+
__setModuleDefault(result, mod);
|
|
52
|
+
return result;
|
|
53
|
+
};
|
|
54
|
+
})();
|
|
55
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
56
|
+
exports.fromVercel = fromVercel;
|
|
57
|
+
const fs = __importStar(require("fs"));
|
|
58
|
+
const os = __importStar(require("os"));
|
|
59
|
+
const path = __importStar(require("path"));
|
|
60
|
+
function fromVercel(result, options = {}) {
|
|
61
|
+
/**
|
|
62
|
+
* Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
|
|
63
|
+
*
|
|
64
|
+
* Supports:
|
|
65
|
+
* - result.text — final answer
|
|
66
|
+
* - result.toolCalls — array of tool invocations
|
|
67
|
+
* - result.toolResults — array of tool results
|
|
68
|
+
* - result.steps — array of reasoning steps
|
|
69
|
+
* - result.usage — token usage stats
|
|
70
|
+
* - result.finishReason — why generation stopped
|
|
71
|
+
*/
|
|
72
|
+
const { agentName = 'vercel-agent', model = 'unknown', question = '', expectedTools = [], } = options;
|
|
73
|
+
const steps = [];
|
|
74
|
+
let stepId = 1;
|
|
75
|
+
let finalAnswer = '';
|
|
76
|
+
let totalTokens = 0;
|
|
77
|
+
// Extract final answer
|
|
78
|
+
if (result?.text) {
|
|
79
|
+
finalAnswer = result.text;
|
|
80
|
+
}
|
|
81
|
+
else if (typeof result === 'string') {
|
|
82
|
+
finalAnswer = result;
|
|
83
|
+
}
|
|
84
|
+
// Extract token usage
|
|
85
|
+
if (result?.usage) {
|
|
86
|
+
totalTokens = (result.usage.promptTokens ?? 0) +
|
|
87
|
+
(result.usage.completionTokens ?? 0);
|
|
88
|
+
}
|
|
89
|
+
// Extract tool calls and results
|
|
90
|
+
const toolCalls = result?.toolCalls ?? [];
|
|
91
|
+
const toolResults = result?.toolResults ?? [];
|
|
92
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
93
|
+
const tc = toolCalls[i];
|
|
94
|
+
const tr = toolResults[i];
|
|
95
|
+
steps.push({
|
|
96
|
+
step_id: stepId++,
|
|
97
|
+
type: 'tool_call',
|
|
98
|
+
tool: tc.toolName ?? tc.name ?? 'unknown_tool',
|
|
99
|
+
input: tc.args ?? tc.input ?? {},
|
|
100
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
101
|
+
duration_ms: 0,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
// Extract steps if present (multi-step generations)
|
|
105
|
+
if (Array.isArray(result?.steps)) {
|
|
106
|
+
for (const step of result.steps) {
|
|
107
|
+
if (step.text && step.text !== finalAnswer) {
|
|
108
|
+
steps.push({
|
|
109
|
+
step_id: stepId++,
|
|
110
|
+
type: 'thought',
|
|
111
|
+
content: step.text,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
// Tool calls inside steps
|
|
115
|
+
for (const tc of step.toolCalls ?? []) {
|
|
116
|
+
const tr = (step.toolResults ?? []).find((r) => r.toolCallId === tc.toolCallId);
|
|
117
|
+
steps.push({
|
|
118
|
+
step_id: stepId++,
|
|
119
|
+
type: 'tool_call',
|
|
120
|
+
tool: tc.toolName ?? 'unknown_tool',
|
|
121
|
+
input: tc.args ?? {},
|
|
122
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
123
|
+
duration_ms: 0,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
const trace = {
|
|
129
|
+
evalforge_version: '0.1',
|
|
130
|
+
trace_id: `vercel-${Date.now()}`,
|
|
131
|
+
timestamp: new Date().toISOString(),
|
|
132
|
+
metadata: {
|
|
133
|
+
framework: 'vercel-ai',
|
|
134
|
+
model,
|
|
135
|
+
agent_name: agentName,
|
|
136
|
+
duration_ms: 0,
|
|
137
|
+
total_tokens: totalTokens,
|
|
138
|
+
},
|
|
139
|
+
input: {
|
|
140
|
+
user: question,
|
|
141
|
+
system: '',
|
|
142
|
+
},
|
|
143
|
+
steps,
|
|
144
|
+
output: {
|
|
145
|
+
answer: finalAnswer,
|
|
146
|
+
finish_reason: result?.finishReason,
|
|
147
|
+
},
|
|
148
|
+
eval_hints: {
|
|
149
|
+
expected_tools: expectedTools,
|
|
150
|
+
expected_answer: null,
|
|
151
|
+
context_documents: [],
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
|
|
155
|
+
fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
|
|
156
|
+
return tmp;
|
|
157
|
+
}
|
|
158
|
+
//# sourceMappingURL=vercel.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vercel.js","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAcH,gCAoHC;AAhID,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAU7B,SAAgB,UAAU,CACxB,MAAW,EACX,UAAgC,EAAE;IAElC;;;;;;;;;;OAUG;IAEH,MAAM,EACJ,SAAS,GAAG,cAAc,EAC1B,KAAK,GAAG,SAAS,EACjB,QAAQ,GAAG,EAAE,EACb,aAAa,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uBAAuB;IACvB,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QACjB,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACtC,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,sBAAsB;IACtB,IAAI,MAAM,EAAE,KAAK,EAAE,CAAC;QAClB,WAAW,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;YAChC,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC;IAC1C,MAAM,WAAW,GAAG,MAAM,EAAE,WAAW,IAAI,EAAE,CAAC;IAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAE1B,KAAK,CAAC,IAAI,CAAC;YACT,OAAO,EAAE,MAAM,EAAE;YACjB,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,IAAI,cAAc;YAC9C,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE;YAChC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;YACrD,WAAW,EAAE,CAAC;SACf,CAAC,CAAC;IACL,CAAC;IAED,oDAAoD;IACpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC3C,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE,IAAI,CAAC,IAAI;iBACnB,CAAC,CAAC;YACL,CAAC;YACD,0BAA0B;YAC1B,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,SAAS,IAAI,EAAE,EAAE,CAAC;gBACtC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,CACtC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,EAAE,CAAC,UAAU,CAC3C,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,WAAW;oBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,cAAc;oBACnC,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;oBACpB,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;oBACrD,WAAW,EAAE,CAAC;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE;QAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,WAAW;YACtB,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,WAAW;SAC1B;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,EAAE;SACX;QACD,KAAK;QACL,MAAM,EAAE;YACN,MAAM,EAAE,WAAW;YACnB,aAAa,EAAE,MAAM,EAAE,YAAY;SACpC;QACD,UAAU,EAAE;YACV,cAAc,EAAE,aAAa;YAC7B,eAAe,EAAE,IAAI;YACrB,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,oBAAoB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1E,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/package.json
CHANGED
package/src/adapters/index.ts
CHANGED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vercel AI SDK adapter for EvalForge.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { generateText, streamText } from 'ai';
|
|
6
|
+
* import { fromVercel } from 'evalforge';
|
|
7
|
+
* import { run } from 'evalforge';
|
|
8
|
+
*
|
|
9
|
+
* const result = await generateText({
|
|
10
|
+
* model: openai('gpt-4o'),
|
|
11
|
+
* prompt: 'What is the capital of France?',
|
|
12
|
+
* tools: { webSearch: ... }
|
|
13
|
+
* });
|
|
14
|
+
*
|
|
15
|
+
* const tracePath = fromVercel(result, {
|
|
16
|
+
* question: 'What is the capital of France?',
|
|
17
|
+
* model: 'gpt-4o'
|
|
18
|
+
* });
|
|
19
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import * as fs from 'fs';
|
|
23
|
+
import * as os from 'os';
|
|
24
|
+
import * as path from 'path';
|
|
25
|
+
import { Trace } from '../types';
|
|
26
|
+
|
|
27
|
+
export interface VercelAdapterOptions {
|
|
28
|
+
agentName?: string;
|
|
29
|
+
model?: string;
|
|
30
|
+
question?: string;
|
|
31
|
+
expectedTools?: string[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function fromVercel(
|
|
35
|
+
result: any,
|
|
36
|
+
options: VercelAdapterOptions = {}
|
|
37
|
+
): string {
|
|
38
|
+
/**
|
|
39
|
+
* Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
|
|
40
|
+
*
|
|
41
|
+
* Supports:
|
|
42
|
+
* - result.text — final answer
|
|
43
|
+
* - result.toolCalls — array of tool invocations
|
|
44
|
+
* - result.toolResults — array of tool results
|
|
45
|
+
* - result.steps — array of reasoning steps
|
|
46
|
+
* - result.usage — token usage stats
|
|
47
|
+
* - result.finishReason — why generation stopped
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
const {
|
|
51
|
+
agentName = 'vercel-agent',
|
|
52
|
+
model = 'unknown',
|
|
53
|
+
question = '',
|
|
54
|
+
expectedTools = [],
|
|
55
|
+
} = options;
|
|
56
|
+
|
|
57
|
+
const steps: Trace['steps'] = [];
|
|
58
|
+
let stepId = 1;
|
|
59
|
+
let finalAnswer = '';
|
|
60
|
+
let totalTokens = 0;
|
|
61
|
+
|
|
62
|
+
// Extract final answer
|
|
63
|
+
if (result?.text) {
|
|
64
|
+
finalAnswer = result.text;
|
|
65
|
+
} else if (typeof result === 'string') {
|
|
66
|
+
finalAnswer = result;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Extract token usage
|
|
70
|
+
if (result?.usage) {
|
|
71
|
+
totalTokens = (result.usage.promptTokens ?? 0) +
|
|
72
|
+
(result.usage.completionTokens ?? 0);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Extract tool calls and results
|
|
76
|
+
const toolCalls = result?.toolCalls ?? [];
|
|
77
|
+
const toolResults = result?.toolResults ?? [];
|
|
78
|
+
|
|
79
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
80
|
+
const tc = toolCalls[i];
|
|
81
|
+
const tr = toolResults[i];
|
|
82
|
+
|
|
83
|
+
steps.push({
|
|
84
|
+
step_id: stepId++,
|
|
85
|
+
type: 'tool_call',
|
|
86
|
+
tool: tc.toolName ?? tc.name ?? 'unknown_tool',
|
|
87
|
+
input: tc.args ?? tc.input ?? {},
|
|
88
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
89
|
+
duration_ms: 0,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Extract steps if present (multi-step generations)
|
|
94
|
+
if (Array.isArray(result?.steps)) {
|
|
95
|
+
for (const step of result.steps) {
|
|
96
|
+
if (step.text && step.text !== finalAnswer) {
|
|
97
|
+
steps.push({
|
|
98
|
+
step_id: stepId++,
|
|
99
|
+
type: 'thought',
|
|
100
|
+
content: step.text,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
// Tool calls inside steps
|
|
104
|
+
for (const tc of step.toolCalls ?? []) {
|
|
105
|
+
const tr = (step.toolResults ?? []).find(
|
|
106
|
+
(r: any) => r.toolCallId === tc.toolCallId
|
|
107
|
+
);
|
|
108
|
+
steps.push({
|
|
109
|
+
step_id: stepId++,
|
|
110
|
+
type: 'tool_call',
|
|
111
|
+
tool: tc.toolName ?? 'unknown_tool',
|
|
112
|
+
input: tc.args ?? {},
|
|
113
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
114
|
+
duration_ms: 0,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const trace: Trace = {
|
|
121
|
+
evalforge_version: '0.1',
|
|
122
|
+
trace_id: `vercel-${Date.now()}`,
|
|
123
|
+
timestamp: new Date().toISOString(),
|
|
124
|
+
metadata: {
|
|
125
|
+
framework: 'vercel-ai',
|
|
126
|
+
model,
|
|
127
|
+
agent_name: agentName,
|
|
128
|
+
duration_ms: 0,
|
|
129
|
+
total_tokens: totalTokens,
|
|
130
|
+
},
|
|
131
|
+
input: {
|
|
132
|
+
user: question,
|
|
133
|
+
system: '',
|
|
134
|
+
},
|
|
135
|
+
steps,
|
|
136
|
+
output: {
|
|
137
|
+
answer: finalAnswer,
|
|
138
|
+
finish_reason: result?.finishReason,
|
|
139
|
+
},
|
|
140
|
+
eval_hints: {
|
|
141
|
+
expected_tools: expectedTools,
|
|
142
|
+
expected_answer: null,
|
|
143
|
+
context_documents: [],
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
|
|
148
|
+
fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
|
|
149
|
+
return tmp;
|
|
150
|
+
}
|
package/tests/adapters.test.js
CHANGED
|
@@ -3,7 +3,7 @@ const assert = require('node:assert');
|
|
|
3
3
|
const path = require('path');
|
|
4
4
|
const fs = require('fs');
|
|
5
5
|
|
|
6
|
-
const { fromMastra } = require('../dist/adapters/index.js');
|
|
6
|
+
const { fromMastra, fromVercel } = require('../dist/adapters/index.js');
|
|
7
7
|
|
|
8
8
|
test('fromMastra with text result', () => {
|
|
9
9
|
const result = { text: 'The capital of France is Paris.' };
|
|
@@ -53,3 +53,58 @@ test('fromMastra with token usage', () => {
|
|
|
53
53
|
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
54
54
|
assert.strictEqual(trace.metadata.total_tokens, 150);
|
|
55
55
|
});
|
|
56
|
+
|
|
57
|
+
test('fromVercel with text result', () => {
|
|
58
|
+
const result = {
|
|
59
|
+
text: 'The capital of France is Paris.',
|
|
60
|
+
usage: { promptTokens: 50, completionTokens: 20 },
|
|
61
|
+
finishReason: 'stop'
|
|
62
|
+
};
|
|
63
|
+
const tracePath = fromVercel(result, {
|
|
64
|
+
question: 'What is the capital of France?',
|
|
65
|
+
model: 'gpt-4o'
|
|
66
|
+
});
|
|
67
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
68
|
+
assert.strictEqual(trace.metadata.framework, 'vercel-ai');
|
|
69
|
+
assert.strictEqual(trace.output.answer, 'The capital of France is Paris.');
|
|
70
|
+
assert.strictEqual(trace.metadata.total_tokens, 70);
|
|
71
|
+
assert.strictEqual(trace.output.finish_reason, 'stop');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('fromVercel with tool calls', () => {
|
|
75
|
+
const result = {
|
|
76
|
+
text: 'Canberra is the capital.',
|
|
77
|
+
toolCalls: [{
|
|
78
|
+
toolName: 'web_search',
|
|
79
|
+
args: { query: 'capital of Australia' }
|
|
80
|
+
}],
|
|
81
|
+
toolResults: [{
|
|
82
|
+
result: 'Canberra is the capital of Australia.'
|
|
83
|
+
}]
|
|
84
|
+
};
|
|
85
|
+
const tracePath = fromVercel(result, {
|
|
86
|
+
question: 'Capital of Australia?',
|
|
87
|
+
expectedTools: ['web_search']
|
|
88
|
+
});
|
|
89
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
90
|
+
assert.strictEqual(trace.steps.length, 1);
|
|
91
|
+
assert.strictEqual(trace.steps[0].tool, 'web_search');
|
|
92
|
+
assert.deepStrictEqual(trace.eval_hints.expected_tools, ['web_search']);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test('fromVercel with multi-step result', () => {
|
|
96
|
+
const result = {
|
|
97
|
+
text: 'Final answer.',
|
|
98
|
+
steps: [
|
|
99
|
+
{ text: 'Thinking...', toolCalls: [], toolResults: [] },
|
|
100
|
+
{
|
|
101
|
+
text: '',
|
|
102
|
+
toolCalls: [{ toolName: 'search', toolCallId: '1', args: { q: 'test' } }],
|
|
103
|
+
toolResults: [{ toolCallId: '1', result: 'search result' }]
|
|
104
|
+
}
|
|
105
|
+
]
|
|
106
|
+
};
|
|
107
|
+
const tracePath = fromVercel(result, { question: 'Test?' });
|
|
108
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
109
|
+
assert.ok(trace.steps.length >= 1);
|
|
110
|
+
});
|