evalforge 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/index.js +8 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/mastra.d.ts +22 -0
- package/dist/adapters/mastra.d.ts.map +1 -0
- package/dist/adapters/mastra.js +151 -0
- package/dist/adapters/mastra.js.map +1 -0
- package/dist/adapters/vercel.d.ts +28 -0
- package/dist/adapters/vercel.d.ts.map +1 -0
- package/dist/adapters/vercel.js +158 -0
- package/dist/adapters/vercel.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/adapters/index.ts +2 -0
- package/src/adapters/mastra.ts +139 -0
- package/src/adapters/vercel.ts +150 -0
- package/src/index.ts +1 -0
- package/tests/adapters.test.js +110 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.fromVercel = exports.fromMastra = void 0;
|
|
4
|
+
var mastra_1 = require("./mastra");
|
|
5
|
+
Object.defineProperty(exports, "fromMastra", { enumerable: true, get: function () { return mastra_1.fromMastra; } });
|
|
6
|
+
var vercel_1 = require("./vercel");
|
|
7
|
+
Object.defineProperty(exports, "fromVercel", { enumerable: true, get: function () { return vercel_1.fromVercel; } });
|
|
8
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA;AACnB,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mastra adapter for EvalForge.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { Agent } from '@mastra/core';
|
|
6
|
+
* import { fromMastra } from 'evalforge/adapters/mastra';
|
|
7
|
+
* import { run } from 'evalforge';
|
|
8
|
+
*
|
|
9
|
+
* const agent = new Agent({ name: 'my-agent', ... });
|
|
10
|
+
* const result = await agent.generate('What is the capital of France?');
|
|
11
|
+
*
|
|
12
|
+
* const tracePath = fromMastra(result, { agentName: 'my-agent', model: 'gpt-4o' });
|
|
13
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
14
|
+
*/
|
|
15
|
+
export interface MastraAdapterOptions {
|
|
16
|
+
agentName?: string;
|
|
17
|
+
model?: string;
|
|
18
|
+
question?: string;
|
|
19
|
+
expectedTools?: string[];
|
|
20
|
+
}
|
|
21
|
+
export declare function fromMastra(result: any, options?: MastraAdapterOptions): string;
|
|
22
|
+
//# sourceMappingURL=mastra.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mastra.d.ts","sourceRoot":"","sources":["../../src/adapters/mastra.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAOH,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,wBAAgB,UAAU,CACxB,MAAM,EAAE,GAAG,EACX,OAAO,GAAE,oBAAyB,GACjC,MAAM,CA4GR"}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Mastra adapter for EvalForge.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* import { Agent } from '@mastra/core';
|
|
7
|
+
* import { fromMastra } from 'evalforge/adapters/mastra';
|
|
8
|
+
* import { run } from 'evalforge';
|
|
9
|
+
*
|
|
10
|
+
* const agent = new Agent({ name: 'my-agent', ... });
|
|
11
|
+
* const result = await agent.generate('What is the capital of France?');
|
|
12
|
+
*
|
|
13
|
+
* const tracePath = fromMastra(result, { agentName: 'my-agent', model: 'gpt-4o' });
|
|
14
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
15
|
+
*/
|
|
16
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
17
|
+
if (k2 === undefined) k2 = k;
|
|
18
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
19
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
20
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
21
|
+
}
|
|
22
|
+
Object.defineProperty(o, k2, desc);
|
|
23
|
+
}) : (function(o, m, k, k2) {
|
|
24
|
+
if (k2 === undefined) k2 = k;
|
|
25
|
+
o[k2] = m[k];
|
|
26
|
+
}));
|
|
27
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
28
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
29
|
+
}) : function(o, v) {
|
|
30
|
+
o["default"] = v;
|
|
31
|
+
});
|
|
32
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
33
|
+
var ownKeys = function(o) {
|
|
34
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
35
|
+
var ar = [];
|
|
36
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
37
|
+
return ar;
|
|
38
|
+
};
|
|
39
|
+
return ownKeys(o);
|
|
40
|
+
};
|
|
41
|
+
return function (mod) {
|
|
42
|
+
if (mod && mod.__esModule) return mod;
|
|
43
|
+
var result = {};
|
|
44
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
45
|
+
__setModuleDefault(result, mod);
|
|
46
|
+
return result;
|
|
47
|
+
};
|
|
48
|
+
})();
|
|
49
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
50
|
+
exports.fromMastra = fromMastra;
|
|
51
|
+
const fs = __importStar(require("fs"));
|
|
52
|
+
const os = __importStar(require("os"));
|
|
53
|
+
const path = __importStar(require("path"));
|
|
54
|
+
function fromMastra(result, options = {}) {
|
|
55
|
+
/**
|
|
56
|
+
* Convert Mastra agent.generate() result to EvalForge trace JSON file.
|
|
57
|
+
*
|
|
58
|
+
* Supports:
|
|
59
|
+
* - result.text — final answer string
|
|
60
|
+
* - result.steps — array of reasoning steps
|
|
61
|
+
* - result.toolCalls — array of tool invocations
|
|
62
|
+
* - result.usage — token usage stats
|
|
63
|
+
*/
|
|
64
|
+
const { agentName = 'mastra-agent', model = 'unknown', question = '', expectedTools = [], } = options;
|
|
65
|
+
const steps = [];
|
|
66
|
+
let stepId = 1;
|
|
67
|
+
let finalAnswer = '';
|
|
68
|
+
let totalTokens = 0;
|
|
69
|
+
// Extract final answer
|
|
70
|
+
if (typeof result === 'string') {
|
|
71
|
+
finalAnswer = result;
|
|
72
|
+
}
|
|
73
|
+
else if (result?.text) {
|
|
74
|
+
finalAnswer = result.text;
|
|
75
|
+
}
|
|
76
|
+
else if (result?.content) {
|
|
77
|
+
finalAnswer = String(result.content);
|
|
78
|
+
}
|
|
79
|
+
// Extract token usage
|
|
80
|
+
if (result?.usage) {
|
|
81
|
+
totalTokens = (result.usage.promptTokens ?? 0) +
|
|
82
|
+
(result.usage.completionTokens ?? 0);
|
|
83
|
+
}
|
|
84
|
+
// Extract steps if present
|
|
85
|
+
if (Array.isArray(result?.steps)) {
|
|
86
|
+
for (const step of result.steps) {
|
|
87
|
+
if (step.text) {
|
|
88
|
+
steps.push({
|
|
89
|
+
step_id: stepId++,
|
|
90
|
+
type: 'thought',
|
|
91
|
+
content: step.text,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
// Extract tool calls if present
|
|
97
|
+
if (Array.isArray(result?.toolCalls)) {
|
|
98
|
+
for (const tc of result.toolCalls) {
|
|
99
|
+
steps.push({
|
|
100
|
+
step_id: stepId++,
|
|
101
|
+
type: 'tool_call',
|
|
102
|
+
tool: tc.toolName ?? tc.name ?? 'unknown_tool',
|
|
103
|
+
input: tc.args ?? tc.input ?? {},
|
|
104
|
+
output: { result: String(tc.result ?? '') },
|
|
105
|
+
duration_ms: 0,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// Extract tool results from steps if toolCalls not separate
|
|
110
|
+
if (Array.isArray(result?.toolResults)) {
|
|
111
|
+
for (const tr of result.toolResults) {
|
|
112
|
+
steps.push({
|
|
113
|
+
step_id: stepId++,
|
|
114
|
+
type: 'tool_call',
|
|
115
|
+
tool: tr.toolName ?? 'unknown_tool',
|
|
116
|
+
input: tr.args ?? {},
|
|
117
|
+
output: { result: String(tr.result ?? '') },
|
|
118
|
+
duration_ms: 0,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const trace = {
|
|
123
|
+
evalforge_version: '0.1',
|
|
124
|
+
trace_id: `mastra-${Date.now()}`,
|
|
125
|
+
timestamp: new Date().toISOString(),
|
|
126
|
+
metadata: {
|
|
127
|
+
framework: 'mastra',
|
|
128
|
+
model,
|
|
129
|
+
agent_name: agentName,
|
|
130
|
+
duration_ms: 0,
|
|
131
|
+
total_tokens: totalTokens,
|
|
132
|
+
},
|
|
133
|
+
input: {
|
|
134
|
+
user: question,
|
|
135
|
+
system: '',
|
|
136
|
+
},
|
|
137
|
+
steps,
|
|
138
|
+
output: {
|
|
139
|
+
answer: finalAnswer,
|
|
140
|
+
},
|
|
141
|
+
eval_hints: {
|
|
142
|
+
expected_tools: expectedTools,
|
|
143
|
+
expected_answer: null,
|
|
144
|
+
context_documents: [],
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
const tmp = path.join(os.tmpdir(), `evalforge_mastra_${Date.now()}.json`);
|
|
148
|
+
fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
|
|
149
|
+
return tmp;
|
|
150
|
+
}
|
|
151
|
+
//# sourceMappingURL=mastra.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mastra.js","sourceRoot":"","sources":["../../src/adapters/mastra.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;GAaG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAcH,gCA+GC;AA3HD,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAU7B,SAAgB,UAAU,CACxB,MAAW,EACX,UAAgC,EAAE;IAElC;;;;;;;;OAQG;IAEH,MAAM,EACJ,SAAS,GAAG,cAAc,EAC1B,KAAK,GAAG,SAAS,EACjB,QAAQ,GAAG,EAAE,EACb,aAAa,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uBAAuB;IACvB,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;SAAM,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QACxB,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC;IAC5B,CAAC;SAAM,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;QAC3B,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACvC,CAAC;IAED,sBAAsB;IACtB,IAAI,MAAM,EAAE,KAAK,EAAE,CAAC;QAClB,WAAW,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;YAChC,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,2BAA2B;IAC3B,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBACd,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE,IAAI,CAAC,IAAI;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,gCAAgC;IAChC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,CAAC;QACrC,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YAClC,KAAK,CAAC,IAAI,CAAC;gBACT,OAAO,EAAE,MAAM,EAAE;gBACjB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,IAAI,cAAc;gBAC9C,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE;gBAChC,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE;gBAC3C,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,4DAA4D;IAC5D,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,CAAC;QACvC,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;YACpC,KAAK,CAAC,IAAI,CAAC;gBACT,OAAO,EAAE,MAAM,EAAE;gBACjB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,cAAc;gBACnC,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;gBACpB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE;gBAC3C,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE;QAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,QAAQ;YACnB,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,WAAW;SAC1B;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,EAAE;SACX;QACD,KAAK;QACL,MAAM,EAAE;YACN,MAAM,EAAE,WAAW;SACpB;QACD,UAAU,EAAE;YACV,cAAc,EAAE,aAAa;YAC7B,eAAe,EAAE,IAAI;YACrB,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,oBAAoB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1E,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vercel AI SDK adapter for EvalForge.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { generateText, streamText } from 'ai';
|
|
6
|
+
* import { fromVercel } from 'evalforge';
|
|
7
|
+
* import { run } from 'evalforge';
|
|
8
|
+
*
|
|
9
|
+
* const result = await generateText({
|
|
10
|
+
* model: openai('gpt-4o'),
|
|
11
|
+
* prompt: 'What is the capital of France?',
|
|
12
|
+
* tools: { webSearch: ... }
|
|
13
|
+
* });
|
|
14
|
+
*
|
|
15
|
+
* const tracePath = fromVercel(result, {
|
|
16
|
+
* question: 'What is the capital of France?',
|
|
17
|
+
* model: 'gpt-4o'
|
|
18
|
+
* });
|
|
19
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
20
|
+
*/
|
|
21
|
+
export interface VercelAdapterOptions {
|
|
22
|
+
agentName?: string;
|
|
23
|
+
model?: string;
|
|
24
|
+
question?: string;
|
|
25
|
+
expectedTools?: string[];
|
|
26
|
+
}
|
|
27
|
+
export declare function fromVercel(result: any, options?: VercelAdapterOptions): string;
|
|
28
|
+
//# sourceMappingURL=vercel.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vercel.d.ts","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAOH,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,wBAAgB,UAAU,CACxB,MAAM,EAAE,GAAG,EACX,OAAO,GAAE,oBAAyB,GACjC,MAAM,CAiHR"}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Vercel AI SDK adapter for EvalForge.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* import { generateText, streamText } from 'ai';
|
|
7
|
+
* import { fromVercel } from 'evalforge';
|
|
8
|
+
* import { run } from 'evalforge';
|
|
9
|
+
*
|
|
10
|
+
* const result = await generateText({
|
|
11
|
+
* model: openai('gpt-4o'),
|
|
12
|
+
* prompt: 'What is the capital of France?',
|
|
13
|
+
* tools: { webSearch: ... }
|
|
14
|
+
* });
|
|
15
|
+
*
|
|
16
|
+
* const tracePath = fromVercel(result, {
|
|
17
|
+
* question: 'What is the capital of France?',
|
|
18
|
+
* model: 'gpt-4o'
|
|
19
|
+
* });
|
|
20
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
21
|
+
*/
|
|
22
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
25
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
26
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
27
|
+
}
|
|
28
|
+
Object.defineProperty(o, k2, desc);
|
|
29
|
+
}) : (function(o, m, k, k2) {
|
|
30
|
+
if (k2 === undefined) k2 = k;
|
|
31
|
+
o[k2] = m[k];
|
|
32
|
+
}));
|
|
33
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
34
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
35
|
+
}) : function(o, v) {
|
|
36
|
+
o["default"] = v;
|
|
37
|
+
});
|
|
38
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
39
|
+
var ownKeys = function(o) {
|
|
40
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
41
|
+
var ar = [];
|
|
42
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
43
|
+
return ar;
|
|
44
|
+
};
|
|
45
|
+
return ownKeys(o);
|
|
46
|
+
};
|
|
47
|
+
return function (mod) {
|
|
48
|
+
if (mod && mod.__esModule) return mod;
|
|
49
|
+
var result = {};
|
|
50
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
51
|
+
__setModuleDefault(result, mod);
|
|
52
|
+
return result;
|
|
53
|
+
};
|
|
54
|
+
})();
|
|
55
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
56
|
+
exports.fromVercel = fromVercel;
|
|
57
|
+
const fs = __importStar(require("fs"));
|
|
58
|
+
const os = __importStar(require("os"));
|
|
59
|
+
const path = __importStar(require("path"));
|
|
60
|
+
function fromVercel(result, options = {}) {
|
|
61
|
+
/**
|
|
62
|
+
* Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
|
|
63
|
+
*
|
|
64
|
+
* Supports:
|
|
65
|
+
* - result.text — final answer
|
|
66
|
+
* - result.toolCalls — array of tool invocations
|
|
67
|
+
* - result.toolResults — array of tool results
|
|
68
|
+
* - result.steps — array of reasoning steps
|
|
69
|
+
* - result.usage — token usage stats
|
|
70
|
+
* - result.finishReason — why generation stopped
|
|
71
|
+
*/
|
|
72
|
+
const { agentName = 'vercel-agent', model = 'unknown', question = '', expectedTools = [], } = options;
|
|
73
|
+
const steps = [];
|
|
74
|
+
let stepId = 1;
|
|
75
|
+
let finalAnswer = '';
|
|
76
|
+
let totalTokens = 0;
|
|
77
|
+
// Extract final answer
|
|
78
|
+
if (result?.text) {
|
|
79
|
+
finalAnswer = result.text;
|
|
80
|
+
}
|
|
81
|
+
else if (typeof result === 'string') {
|
|
82
|
+
finalAnswer = result;
|
|
83
|
+
}
|
|
84
|
+
// Extract token usage
|
|
85
|
+
if (result?.usage) {
|
|
86
|
+
totalTokens = (result.usage.promptTokens ?? 0) +
|
|
87
|
+
(result.usage.completionTokens ?? 0);
|
|
88
|
+
}
|
|
89
|
+
// Extract tool calls and results
|
|
90
|
+
const toolCalls = result?.toolCalls ?? [];
|
|
91
|
+
const toolResults = result?.toolResults ?? [];
|
|
92
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
93
|
+
const tc = toolCalls[i];
|
|
94
|
+
const tr = toolResults[i];
|
|
95
|
+
steps.push({
|
|
96
|
+
step_id: stepId++,
|
|
97
|
+
type: 'tool_call',
|
|
98
|
+
tool: tc.toolName ?? tc.name ?? 'unknown_tool',
|
|
99
|
+
input: tc.args ?? tc.input ?? {},
|
|
100
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
101
|
+
duration_ms: 0,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
// Extract steps if present (multi-step generations)
|
|
105
|
+
if (Array.isArray(result?.steps)) {
|
|
106
|
+
for (const step of result.steps) {
|
|
107
|
+
if (step.text && step.text !== finalAnswer) {
|
|
108
|
+
steps.push({
|
|
109
|
+
step_id: stepId++,
|
|
110
|
+
type: 'thought',
|
|
111
|
+
content: step.text,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
// Tool calls inside steps
|
|
115
|
+
for (const tc of step.toolCalls ?? []) {
|
|
116
|
+
const tr = (step.toolResults ?? []).find((r) => r.toolCallId === tc.toolCallId);
|
|
117
|
+
steps.push({
|
|
118
|
+
step_id: stepId++,
|
|
119
|
+
type: 'tool_call',
|
|
120
|
+
tool: tc.toolName ?? 'unknown_tool',
|
|
121
|
+
input: tc.args ?? {},
|
|
122
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
123
|
+
duration_ms: 0,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
const trace = {
|
|
129
|
+
evalforge_version: '0.1',
|
|
130
|
+
trace_id: `vercel-${Date.now()}`,
|
|
131
|
+
timestamp: new Date().toISOString(),
|
|
132
|
+
metadata: {
|
|
133
|
+
framework: 'vercel-ai',
|
|
134
|
+
model,
|
|
135
|
+
agent_name: agentName,
|
|
136
|
+
duration_ms: 0,
|
|
137
|
+
total_tokens: totalTokens,
|
|
138
|
+
},
|
|
139
|
+
input: {
|
|
140
|
+
user: question,
|
|
141
|
+
system: '',
|
|
142
|
+
},
|
|
143
|
+
steps,
|
|
144
|
+
output: {
|
|
145
|
+
answer: finalAnswer,
|
|
146
|
+
finish_reason: result?.finishReason,
|
|
147
|
+
},
|
|
148
|
+
eval_hints: {
|
|
149
|
+
expected_tools: expectedTools,
|
|
150
|
+
expected_answer: null,
|
|
151
|
+
context_documents: [],
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
|
|
155
|
+
fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
|
|
156
|
+
return tmp;
|
|
157
|
+
}
|
|
158
|
+
//# sourceMappingURL=vercel.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vercel.js","sourceRoot":"","sources":["../../src/adapters/vercel.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAcH,gCAoHC;AAhID,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAU7B,SAAgB,UAAU,CACxB,MAAW,EACX,UAAgC,EAAE;IAElC;;;;;;;;;;OAUG;IAEH,MAAM,EACJ,SAAS,GAAG,cAAc,EAC1B,KAAK,GAAG,SAAS,EACjB,QAAQ,GAAG,EAAE,EACb,aAAa,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uBAAuB;IACvB,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QACjB,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC;IAC5B,CAAC;SAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACtC,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,sBAAsB;IACtB,IAAI,MAAM,EAAE,KAAK,EAAE,CAAC;QAClB,WAAW,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;YAChC,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC;IAC1C,MAAM,WAAW,GAAG,MAAM,EAAE,WAAW,IAAI,EAAE,CAAC;IAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAE1B,KAAK,CAAC,IAAI,CAAC;YACT,OAAO,EAAE,MAAM,EAAE;YACjB,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,IAAI,cAAc;YAC9C,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE;YAChC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;YACrD,WAAW,EAAE,CAAC;SACf,CAAC,CAAC;IACL,CAAC;IAED,oDAAoD;IACpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC3C,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE,IAAI,CAAC,IAAI;iBACnB,CAAC,CAAC;YACL,CAAC;YACD,0BAA0B;YAC1B,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,SAAS,IAAI,EAAE,EAAE,CAAC;gBACtC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,CACtC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,EAAE,CAAC,UAAU,CAC3C,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,WAAW;oBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,cAAc;oBACnC,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;oBACpB,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;oBACrD,WAAW,EAAE,CAAC;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE;QAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,WAAW;YACtB,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,WAAW;SAC1B;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,EAAE;SACX;QACD,KAAK;QACL,MAAM,EAAE;YACN,MAAM,EAAE,WAAW;YACnB,aAAa,EAAE,MAAM,EAAE,YAAY;SACpC;QACD,UAAU,EAAE;YACV,cAAc,EAAE,aAAa;YAC7B,eAAe,EAAE,IAAI;YACrB,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,oBAAoB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1E,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { EvalResult, RunOptions } from './types';
|
|
2
2
|
export { EvalResult, MetricResult, RunOptions, Trace } from './types';
|
|
3
|
+
export * from './adapters';
|
|
3
4
|
export declare function run(tracePath: string, options: RunOptions): EvalResult;
|
|
4
5
|
export declare function demo(): EvalResult;
|
|
5
6
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAS,MAAM,SAAS,CAAC;AAExD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAS,MAAM,SAAS,CAAC;AAExD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AACtE,cAAc,YAAY,CAAC;AAkC3B,wBAAgB,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,GAAG,UAAU,CAmBtE;AAED,wBAAgB,IAAI,IAAI,UAAU,CAuCjC"}
|
package/dist/index.js
CHANGED
|
@@ -32,6 +32,9 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
32
32
|
return result;
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
36
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
37
|
+
};
|
|
35
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
39
|
exports.run = run;
|
|
37
40
|
exports.demo = demo;
|
|
@@ -40,6 +43,7 @@ const fs = __importStar(require("fs"));
|
|
|
40
43
|
const os = __importStar(require("os"));
|
|
41
44
|
const path = __importStar(require("path"));
|
|
42
45
|
const binary_1 = require("./binary");
|
|
46
|
+
__exportStar(require("./adapters"), exports);
|
|
43
47
|
function parseOutput(output) {
|
|
44
48
|
const metrics = [];
|
|
45
49
|
const lines = output.split('\n');
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA0CA,kBAmBC;AAED,oBAuCC;AAtGD,iDAA0C;AAC1C,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAC7B,qCAAsC;AAItC,6CAA2B;AAE3B,SAAS,WAAW,CAAC,MAAc;IACjC,MAAM,OAAO,GAAG,EAAE,CAAC;IACnB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAEjC,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,SAAS,GAAG,EAAE,CAAC;IACnB,IAAI,aAAa,GAAG,IAAI,CAAC;IAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAE7B,IAAI,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACtE,IAAI,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC;YAAE,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzE,IAAI,IAAI,KAAK,eAAe;YAAE,aAAa,GAAG,KAAK,CAAC;QAEpD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACnE,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;gBACvD,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE;gBAC5C,CAAC,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC;gBACtB,KAAK,EAAE,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;gBACjC,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,MAAM;gBACjC,MAAM;aACP,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AAChE,CAAC;AAED,SAAgB,GAAG,CAAC,SAAiB,EAAE,OAAmB;IACxD,MAAM,MAAM,GAAG,IAAA,mBAAU,GAAE,CAAC;IAC5B,MAAM,IAAI,GAAG;QACX,KAAK;QACL,SAAS,EAAE,SAAS;QACpB,WAAW,EAAE,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC;QACtC,aAAa,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;KAChD,CAAC;IAEF,IAAI,OAAO,CAAC,IAAI;QAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACtC,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAAC,CAAC;IAEzE,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC/B,IAAI,OAAO,CAAC,MAAM;QAAE,GAAG,CAAC,iBAAiB,GAAG,OAAO,CAAC,MAAM,CAAC;IAE3D,MAAM,MAAM,GAAG,IAAA,yBAAS,EAAC,MAAM,EAAE,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;IAClE,IAAI,MAAM,CAAC,KAAK;QAAE,MAAM,MAAM,CAAC,KAAK,CAAC;IAErC,OAAO,WAAW,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;AACpC,CAAC;AAED,SAAgB,IAAI;IAClB,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,aAAa;QACvB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,eAAe;YAC1B,KAAK,EAAE,QAAQ;YACf,UAAU,EAAE,YAAY;YACxB,WAAW,EAAE,IAAI;YACjB,YAAY,EAAE,GAAG;SAClB;QACD,KAAK,EAAE;YACL,IAAI,EAAE,mCAAmC;YACzC,MAAM,EAAE,8BAA8B;SACvC;QACD,KAAK,EAAE;YACL,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,yBAAyB,EAAE;YACnE;gBACE,OAAO,EAAE,CAAC;gBACV,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,EAAE,KAAK,EAAE,sBAAsB,EAAE;gBACxC,MAAM,EAAE,EAAE,MAAM,EAAE,uCAAuC,EAAE;gBAC3D,WAAW,EAAE,GAAG;aACjB;SACF;QACD,MAAM,EAAE,EAAE,MAAM,EAAE,uCAAuC,EAAE;QAC3D,UAAU,EAAE;YACV,cAAc,EAAE,CAAC,YAAY,CAAC;YAC9B,eAAe,EAAE,UAAU;YAC3B,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,kBAAkB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACxE,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IAE7C,OAAO,GAAG,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,cAAc,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;AAC7D,CAAC"}
|
package/package.json
CHANGED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mastra adapter for EvalForge.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { Agent } from '@mastra/core';
|
|
6
|
+
* import { fromMastra } from 'evalforge/adapters/mastra';
|
|
7
|
+
* import { run } from 'evalforge';
|
|
8
|
+
*
|
|
9
|
+
* const agent = new Agent({ name: 'my-agent', ... });
|
|
10
|
+
* const result = await agent.generate('What is the capital of France?');
|
|
11
|
+
*
|
|
12
|
+
* const tracePath = fromMastra(result, { agentName: 'my-agent', model: 'gpt-4o' });
|
|
13
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import * as fs from 'fs';
|
|
17
|
+
import * as os from 'os';
|
|
18
|
+
import * as path from 'path';
|
|
19
|
+
import { Trace } from '../types';
|
|
20
|
+
|
|
21
|
+
export interface MastraAdapterOptions {
|
|
22
|
+
agentName?: string;
|
|
23
|
+
model?: string;
|
|
24
|
+
question?: string;
|
|
25
|
+
expectedTools?: string[];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function fromMastra(
|
|
29
|
+
result: any,
|
|
30
|
+
options: MastraAdapterOptions = {}
|
|
31
|
+
): string {
|
|
32
|
+
/**
|
|
33
|
+
* Convert Mastra agent.generate() result to EvalForge trace JSON file.
|
|
34
|
+
*
|
|
35
|
+
* Supports:
|
|
36
|
+
* - result.text — final answer string
|
|
37
|
+
* - result.steps — array of reasoning steps
|
|
38
|
+
* - result.toolCalls — array of tool invocations
|
|
39
|
+
* - result.usage — token usage stats
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
const {
|
|
43
|
+
agentName = 'mastra-agent',
|
|
44
|
+
model = 'unknown',
|
|
45
|
+
question = '',
|
|
46
|
+
expectedTools = [],
|
|
47
|
+
} = options;
|
|
48
|
+
|
|
49
|
+
const steps: Trace['steps'] = [];
|
|
50
|
+
let stepId = 1;
|
|
51
|
+
let finalAnswer = '';
|
|
52
|
+
let totalTokens = 0;
|
|
53
|
+
|
|
54
|
+
// Extract final answer
|
|
55
|
+
if (typeof result === 'string') {
|
|
56
|
+
finalAnswer = result;
|
|
57
|
+
} else if (result?.text) {
|
|
58
|
+
finalAnswer = result.text;
|
|
59
|
+
} else if (result?.content) {
|
|
60
|
+
finalAnswer = String(result.content);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Extract token usage
|
|
64
|
+
if (result?.usage) {
|
|
65
|
+
totalTokens = (result.usage.promptTokens ?? 0) +
|
|
66
|
+
(result.usage.completionTokens ?? 0);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Extract steps if present
|
|
70
|
+
if (Array.isArray(result?.steps)) {
|
|
71
|
+
for (const step of result.steps) {
|
|
72
|
+
if (step.text) {
|
|
73
|
+
steps.push({
|
|
74
|
+
step_id: stepId++,
|
|
75
|
+
type: 'thought',
|
|
76
|
+
content: step.text,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Extract tool calls if present
|
|
83
|
+
if (Array.isArray(result?.toolCalls)) {
|
|
84
|
+
for (const tc of result.toolCalls) {
|
|
85
|
+
steps.push({
|
|
86
|
+
step_id: stepId++,
|
|
87
|
+
type: 'tool_call',
|
|
88
|
+
tool: tc.toolName ?? tc.name ?? 'unknown_tool',
|
|
89
|
+
input: tc.args ?? tc.input ?? {},
|
|
90
|
+
output: { result: String(tc.result ?? '') },
|
|
91
|
+
duration_ms: 0,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Extract tool results from steps if toolCalls not separate
|
|
97
|
+
if (Array.isArray(result?.toolResults)) {
|
|
98
|
+
for (const tr of result.toolResults) {
|
|
99
|
+
steps.push({
|
|
100
|
+
step_id: stepId++,
|
|
101
|
+
type: 'tool_call',
|
|
102
|
+
tool: tr.toolName ?? 'unknown_tool',
|
|
103
|
+
input: tr.args ?? {},
|
|
104
|
+
output: { result: String(tr.result ?? '') },
|
|
105
|
+
duration_ms: 0,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const trace: Trace = {
|
|
111
|
+
evalforge_version: '0.1',
|
|
112
|
+
trace_id: `mastra-${Date.now()}`,
|
|
113
|
+
timestamp: new Date().toISOString(),
|
|
114
|
+
metadata: {
|
|
115
|
+
framework: 'mastra',
|
|
116
|
+
model,
|
|
117
|
+
agent_name: agentName,
|
|
118
|
+
duration_ms: 0,
|
|
119
|
+
total_tokens: totalTokens,
|
|
120
|
+
},
|
|
121
|
+
input: {
|
|
122
|
+
user: question,
|
|
123
|
+
system: '',
|
|
124
|
+
},
|
|
125
|
+
steps,
|
|
126
|
+
output: {
|
|
127
|
+
answer: finalAnswer,
|
|
128
|
+
},
|
|
129
|
+
eval_hints: {
|
|
130
|
+
expected_tools: expectedTools,
|
|
131
|
+
expected_answer: null,
|
|
132
|
+
context_documents: [],
|
|
133
|
+
},
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
const tmp = path.join(os.tmpdir(), `evalforge_mastra_${Date.now()}.json`);
|
|
137
|
+
fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
|
|
138
|
+
return tmp;
|
|
139
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vercel AI SDK adapter for EvalForge.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { generateText, streamText } from 'ai';
|
|
6
|
+
* import { fromVercel } from 'evalforge';
|
|
7
|
+
* import { run } from 'evalforge';
|
|
8
|
+
*
|
|
9
|
+
* const result = await generateText({
|
|
10
|
+
* model: openai('gpt-4o'),
|
|
11
|
+
* prompt: 'What is the capital of France?',
|
|
12
|
+
* tools: { webSearch: ... }
|
|
13
|
+
* });
|
|
14
|
+
*
|
|
15
|
+
* const tracePath = fromVercel(result, {
|
|
16
|
+
* question: 'What is the capital of France?',
|
|
17
|
+
* model: 'gpt-4o'
|
|
18
|
+
* });
|
|
19
|
+
* const evalResult = run(tracePath, { metrics: ['faithfulness'] });
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import * as fs from 'fs';
|
|
23
|
+
import * as os from 'os';
|
|
24
|
+
import * as path from 'path';
|
|
25
|
+
import { Trace } from '../types';
|
|
26
|
+
|
|
27
|
+
export interface VercelAdapterOptions {
|
|
28
|
+
agentName?: string;
|
|
29
|
+
model?: string;
|
|
30
|
+
question?: string;
|
|
31
|
+
expectedTools?: string[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function fromVercel(
|
|
35
|
+
result: any,
|
|
36
|
+
options: VercelAdapterOptions = {}
|
|
37
|
+
): string {
|
|
38
|
+
/**
|
|
39
|
+
* Convert Vercel AI SDK generateText() result to EvalForge trace JSON.
|
|
40
|
+
*
|
|
41
|
+
* Supports:
|
|
42
|
+
* - result.text — final answer
|
|
43
|
+
* - result.toolCalls — array of tool invocations
|
|
44
|
+
* - result.toolResults — array of tool results
|
|
45
|
+
* - result.steps — array of reasoning steps
|
|
46
|
+
* - result.usage — token usage stats
|
|
47
|
+
* - result.finishReason — why generation stopped
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
const {
|
|
51
|
+
agentName = 'vercel-agent',
|
|
52
|
+
model = 'unknown',
|
|
53
|
+
question = '',
|
|
54
|
+
expectedTools = [],
|
|
55
|
+
} = options;
|
|
56
|
+
|
|
57
|
+
const steps: Trace['steps'] = [];
|
|
58
|
+
let stepId = 1;
|
|
59
|
+
let finalAnswer = '';
|
|
60
|
+
let totalTokens = 0;
|
|
61
|
+
|
|
62
|
+
// Extract final answer
|
|
63
|
+
if (result?.text) {
|
|
64
|
+
finalAnswer = result.text;
|
|
65
|
+
} else if (typeof result === 'string') {
|
|
66
|
+
finalAnswer = result;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Extract token usage
|
|
70
|
+
if (result?.usage) {
|
|
71
|
+
totalTokens = (result.usage.promptTokens ?? 0) +
|
|
72
|
+
(result.usage.completionTokens ?? 0);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Extract tool calls and results
|
|
76
|
+
const toolCalls = result?.toolCalls ?? [];
|
|
77
|
+
const toolResults = result?.toolResults ?? [];
|
|
78
|
+
|
|
79
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
80
|
+
const tc = toolCalls[i];
|
|
81
|
+
const tr = toolResults[i];
|
|
82
|
+
|
|
83
|
+
steps.push({
|
|
84
|
+
step_id: stepId++,
|
|
85
|
+
type: 'tool_call',
|
|
86
|
+
tool: tc.toolName ?? tc.name ?? 'unknown_tool',
|
|
87
|
+
input: tc.args ?? tc.input ?? {},
|
|
88
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
89
|
+
duration_ms: 0,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Extract steps if present (multi-step generations)
|
|
94
|
+
if (Array.isArray(result?.steps)) {
|
|
95
|
+
for (const step of result.steps) {
|
|
96
|
+
if (step.text && step.text !== finalAnswer) {
|
|
97
|
+
steps.push({
|
|
98
|
+
step_id: stepId++,
|
|
99
|
+
type: 'thought',
|
|
100
|
+
content: step.text,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
// Tool calls inside steps
|
|
104
|
+
for (const tc of step.toolCalls ?? []) {
|
|
105
|
+
const tr = (step.toolResults ?? []).find(
|
|
106
|
+
(r: any) => r.toolCallId === tc.toolCallId
|
|
107
|
+
);
|
|
108
|
+
steps.push({
|
|
109
|
+
step_id: stepId++,
|
|
110
|
+
type: 'tool_call',
|
|
111
|
+
tool: tc.toolName ?? 'unknown_tool',
|
|
112
|
+
input: tc.args ?? {},
|
|
113
|
+
output: tr ? { result: String(tr.result ?? '') } : {},
|
|
114
|
+
duration_ms: 0,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const trace: Trace = {
|
|
121
|
+
evalforge_version: '0.1',
|
|
122
|
+
trace_id: `vercel-${Date.now()}`,
|
|
123
|
+
timestamp: new Date().toISOString(),
|
|
124
|
+
metadata: {
|
|
125
|
+
framework: 'vercel-ai',
|
|
126
|
+
model,
|
|
127
|
+
agent_name: agentName,
|
|
128
|
+
duration_ms: 0,
|
|
129
|
+
total_tokens: totalTokens,
|
|
130
|
+
},
|
|
131
|
+
input: {
|
|
132
|
+
user: question,
|
|
133
|
+
system: '',
|
|
134
|
+
},
|
|
135
|
+
steps,
|
|
136
|
+
output: {
|
|
137
|
+
answer: finalAnswer,
|
|
138
|
+
finish_reason: result?.finishReason,
|
|
139
|
+
},
|
|
140
|
+
eval_hints: {
|
|
141
|
+
expected_tools: expectedTools,
|
|
142
|
+
expected_answer: null,
|
|
143
|
+
context_documents: [],
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const tmp = path.join(os.tmpdir(), `evalforge_vercel_${Date.now()}.json`);
|
|
148
|
+
fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
|
|
149
|
+
return tmp;
|
|
150
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { findBinary } from './binary';
|
|
|
6
6
|
import { EvalResult, RunOptions, Trace } from './types';
|
|
7
7
|
|
|
8
8
|
export { EvalResult, MetricResult, RunOptions, Trace } from './types';
|
|
9
|
+
export * from './adapters';
|
|
9
10
|
|
|
10
11
|
function parseOutput(output: string): EvalResult {
|
|
11
12
|
const metrics = [];
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
const { test } = require('node:test');
|
|
2
|
+
const assert = require('node:assert');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
|
|
6
|
+
const { fromMastra, fromVercel } = require('../dist/adapters/index.js');
|
|
7
|
+
|
|
8
|
+
test('fromMastra with text result', () => {
|
|
9
|
+
const result = { text: 'The capital of France is Paris.' };
|
|
10
|
+
const tracePath = fromMastra(result, {
|
|
11
|
+
agentName: 'test-agent',
|
|
12
|
+
model: 'gpt-4o',
|
|
13
|
+
question: 'What is the capital of France?'
|
|
14
|
+
});
|
|
15
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
16
|
+
assert.strictEqual(trace.metadata.framework, 'mastra');
|
|
17
|
+
assert.strictEqual(trace.output.answer, 'The capital of France is Paris.');
|
|
18
|
+
assert.strictEqual(trace.input.user, 'What is the capital of France?');
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test('fromMastra with tool calls', () => {
|
|
22
|
+
const result = {
|
|
23
|
+
text: 'Canberra is the capital.',
|
|
24
|
+
toolCalls: [{
|
|
25
|
+
toolName: 'web_search',
|
|
26
|
+
args: { query: 'capital of Australia' },
|
|
27
|
+
result: 'Canberra is the capital of Australia.'
|
|
28
|
+
}]
|
|
29
|
+
};
|
|
30
|
+
const tracePath = fromMastra(result, {
|
|
31
|
+
question: 'What is the capital of Australia?',
|
|
32
|
+
expectedTools: ['web_search']
|
|
33
|
+
});
|
|
34
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
35
|
+
assert.strictEqual(trace.steps.length, 1);
|
|
36
|
+
assert.strictEqual(trace.steps[0].type, 'tool_call');
|
|
37
|
+
assert.strictEqual(trace.steps[0].tool, 'web_search');
|
|
38
|
+
assert.deepStrictEqual(trace.eval_hints.expected_tools, ['web_search']);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test('fromMastra with string result', () => {
|
|
42
|
+
const tracePath = fromMastra('Direct answer.', { agentName: 'simple' });
|
|
43
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
44
|
+
assert.strictEqual(trace.output.answer, 'Direct answer.');
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test('fromMastra with token usage', () => {
|
|
48
|
+
const result = {
|
|
49
|
+
text: 'Answer.',
|
|
50
|
+
usage: { promptTokens: 100, completionTokens: 50 }
|
|
51
|
+
};
|
|
52
|
+
const tracePath = fromMastra(result);
|
|
53
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
54
|
+
assert.strictEqual(trace.metadata.total_tokens, 150);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test('fromVercel with text result', () => {
|
|
58
|
+
const result = {
|
|
59
|
+
text: 'The capital of France is Paris.',
|
|
60
|
+
usage: { promptTokens: 50, completionTokens: 20 },
|
|
61
|
+
finishReason: 'stop'
|
|
62
|
+
};
|
|
63
|
+
const tracePath = fromVercel(result, {
|
|
64
|
+
question: 'What is the capital of France?',
|
|
65
|
+
model: 'gpt-4o'
|
|
66
|
+
});
|
|
67
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
68
|
+
assert.strictEqual(trace.metadata.framework, 'vercel-ai');
|
|
69
|
+
assert.strictEqual(trace.output.answer, 'The capital of France is Paris.');
|
|
70
|
+
assert.strictEqual(trace.metadata.total_tokens, 70);
|
|
71
|
+
assert.strictEqual(trace.output.finish_reason, 'stop');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('fromVercel with tool calls', () => {
|
|
75
|
+
const result = {
|
|
76
|
+
text: 'Canberra is the capital.',
|
|
77
|
+
toolCalls: [{
|
|
78
|
+
toolName: 'web_search',
|
|
79
|
+
args: { query: 'capital of Australia' }
|
|
80
|
+
}],
|
|
81
|
+
toolResults: [{
|
|
82
|
+
result: 'Canberra is the capital of Australia.'
|
|
83
|
+
}]
|
|
84
|
+
};
|
|
85
|
+
const tracePath = fromVercel(result, {
|
|
86
|
+
question: 'Capital of Australia?',
|
|
87
|
+
expectedTools: ['web_search']
|
|
88
|
+
});
|
|
89
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
90
|
+
assert.strictEqual(trace.steps.length, 1);
|
|
91
|
+
assert.strictEqual(trace.steps[0].tool, 'web_search');
|
|
92
|
+
assert.deepStrictEqual(trace.eval_hints.expected_tools, ['web_search']);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test('fromVercel with multi-step result', () => {
|
|
96
|
+
const result = {
|
|
97
|
+
text: 'Final answer.',
|
|
98
|
+
steps: [
|
|
99
|
+
{ text: 'Thinking...', toolCalls: [], toolResults: [] },
|
|
100
|
+
{
|
|
101
|
+
text: '',
|
|
102
|
+
toolCalls: [{ toolName: 'search', toolCallId: '1', args: { q: 'test' } }],
|
|
103
|
+
toolResults: [{ toolCallId: '1', result: 'search result' }]
|
|
104
|
+
}
|
|
105
|
+
]
|
|
106
|
+
};
|
|
107
|
+
const tracePath = fromVercel(result, { question: 'Test?' });
|
|
108
|
+
const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
|
|
109
|
+
assert.ok(trace.steps.length >= 1);
|
|
110
|
+
});
|