evalforge 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ export { fromMastra } from './mastra';
2
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
@@ -0,0 +1,6 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.fromMastra = void 0;
4
+ var mastra_1 = require("./mastra");
5
+ Object.defineProperty(exports, "fromMastra", { enumerable: true, get: function () { return mastra_1.fromMastra; } });
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAA7B,oGAAA,UAAU,OAAA"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Mastra adapter for EvalForge.
3
+ *
4
+ * Usage:
5
+ * import { Agent } from '@mastra/core';
6
+ * import { fromMastra } from 'evalforge/adapters/mastra';
7
+ * import { run } from 'evalforge';
8
+ *
9
+ * const agent = new Agent({ name: 'my-agent', ... });
10
+ * const result = await agent.generate('What is the capital of France?');
11
+ *
12
+ * const tracePath = fromMastra(result, { agentName: 'my-agent', model: 'gpt-4o' });
13
+ * const evalResult = run(tracePath, { metrics: ['faithfulness'] });
14
+ */
15
+ export interface MastraAdapterOptions {
16
+ agentName?: string;
17
+ model?: string;
18
+ question?: string;
19
+ expectedTools?: string[];
20
+ }
21
+ export declare function fromMastra(result: any, options?: MastraAdapterOptions): string;
22
+ //# sourceMappingURL=mastra.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mastra.d.ts","sourceRoot":"","sources":["../../src/adapters/mastra.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAOH,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,wBAAgB,UAAU,CACxB,MAAM,EAAE,GAAG,EACX,OAAO,GAAE,oBAAyB,GACjC,MAAM,CA4GR"}
@@ -0,0 +1,151 @@
1
+ "use strict";
2
+ /**
3
+ * Mastra adapter for EvalForge.
4
+ *
5
+ * Usage:
6
+ * import { Agent } from '@mastra/core';
7
+ * import { fromMastra } from 'evalforge/adapters/mastra';
8
+ * import { run } from 'evalforge';
9
+ *
10
+ * const agent = new Agent({ name: 'my-agent', ... });
11
+ * const result = await agent.generate('What is the capital of France?');
12
+ *
13
+ * const tracePath = fromMastra(result, { agentName: 'my-agent', model: 'gpt-4o' });
14
+ * const evalResult = run(tracePath, { metrics: ['faithfulness'] });
15
+ */
16
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
17
+ if (k2 === undefined) k2 = k;
18
+ var desc = Object.getOwnPropertyDescriptor(m, k);
19
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
20
+ desc = { enumerable: true, get: function() { return m[k]; } };
21
+ }
22
+ Object.defineProperty(o, k2, desc);
23
+ }) : (function(o, m, k, k2) {
24
+ if (k2 === undefined) k2 = k;
25
+ o[k2] = m[k];
26
+ }));
27
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
28
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
29
+ }) : function(o, v) {
30
+ o["default"] = v;
31
+ });
32
+ var __importStar = (this && this.__importStar) || (function () {
33
+ var ownKeys = function(o) {
34
+ ownKeys = Object.getOwnPropertyNames || function (o) {
35
+ var ar = [];
36
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
37
+ return ar;
38
+ };
39
+ return ownKeys(o);
40
+ };
41
+ return function (mod) {
42
+ if (mod && mod.__esModule) return mod;
43
+ var result = {};
44
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
45
+ __setModuleDefault(result, mod);
46
+ return result;
47
+ };
48
+ })();
49
+ Object.defineProperty(exports, "__esModule", { value: true });
50
+ exports.fromMastra = fromMastra;
51
+ const fs = __importStar(require("fs"));
52
+ const os = __importStar(require("os"));
53
+ const path = __importStar(require("path"));
54
+ function fromMastra(result, options = {}) {
55
+ /**
56
+ * Convert Mastra agent.generate() result to EvalForge trace JSON file.
57
+ *
58
+ * Supports:
59
+ * - result.text — final answer string
60
+ * - result.steps — array of reasoning steps
61
+ * - result.toolCalls — array of tool invocations
62
+ * - result.usage — token usage stats
63
+ */
64
+ const { agentName = 'mastra-agent', model = 'unknown', question = '', expectedTools = [], } = options;
65
+ const steps = [];
66
+ let stepId = 1;
67
+ let finalAnswer = '';
68
+ let totalTokens = 0;
69
+ // Extract final answer
70
+ if (typeof result === 'string') {
71
+ finalAnswer = result;
72
+ }
73
+ else if (result?.text) {
74
+ finalAnswer = result.text;
75
+ }
76
+ else if (result?.content) {
77
+ finalAnswer = String(result.content);
78
+ }
79
+ // Extract token usage
80
+ if (result?.usage) {
81
+ totalTokens = (result.usage.promptTokens ?? 0) +
82
+ (result.usage.completionTokens ?? 0);
83
+ }
84
+ // Extract steps if present
85
+ if (Array.isArray(result?.steps)) {
86
+ for (const step of result.steps) {
87
+ if (step.text) {
88
+ steps.push({
89
+ step_id: stepId++,
90
+ type: 'thought',
91
+ content: step.text,
92
+ });
93
+ }
94
+ }
95
+ }
96
+ // Extract tool calls if present
97
+ if (Array.isArray(result?.toolCalls)) {
98
+ for (const tc of result.toolCalls) {
99
+ steps.push({
100
+ step_id: stepId++,
101
+ type: 'tool_call',
102
+ tool: tc.toolName ?? tc.name ?? 'unknown_tool',
103
+ input: tc.args ?? tc.input ?? {},
104
+ output: { result: String(tc.result ?? '') },
105
+ duration_ms: 0,
106
+ });
107
+ }
108
+ }
109
+ // Extract tool results from steps if toolCalls not separate
110
+ if (Array.isArray(result?.toolResults)) {
111
+ for (const tr of result.toolResults) {
112
+ steps.push({
113
+ step_id: stepId++,
114
+ type: 'tool_call',
115
+ tool: tr.toolName ?? 'unknown_tool',
116
+ input: tr.args ?? {},
117
+ output: { result: String(tr.result ?? '') },
118
+ duration_ms: 0,
119
+ });
120
+ }
121
+ }
122
+ const trace = {
123
+ evalforge_version: '0.1',
124
+ trace_id: `mastra-${Date.now()}`,
125
+ timestamp: new Date().toISOString(),
126
+ metadata: {
127
+ framework: 'mastra',
128
+ model,
129
+ agent_name: agentName,
130
+ duration_ms: 0,
131
+ total_tokens: totalTokens,
132
+ },
133
+ input: {
134
+ user: question,
135
+ system: '',
136
+ },
137
+ steps,
138
+ output: {
139
+ answer: finalAnswer,
140
+ },
141
+ eval_hints: {
142
+ expected_tools: expectedTools,
143
+ expected_answer: null,
144
+ context_documents: [],
145
+ },
146
+ };
147
+ const tmp = path.join(os.tmpdir(), `evalforge_mastra_${Date.now()}.json`);
148
+ fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
149
+ return tmp;
150
+ }
151
+ //# sourceMappingURL=mastra.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mastra.js","sourceRoot":"","sources":["../../src/adapters/mastra.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;GAaG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAcH,gCA+GC;AA3HD,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAU7B,SAAgB,UAAU,CACxB,MAAW,EACX,UAAgC,EAAE;IAElC;;;;;;;;OAQG;IAEH,MAAM,EACJ,SAAS,GAAG,cAAc,EAC1B,KAAK,GAAG,SAAS,EACjB,QAAQ,GAAG,EAAE,EACb,aAAa,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,uBAAuB;IACvB,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;SAAM,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QACxB,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC;IAC5B,CAAC;SAAM,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;QAC3B,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACvC,CAAC;IAED,sBAAsB;IACtB,IAAI,MAAM,EAAE,KAAK,EAAE,CAAC;QAClB,WAAW,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;YAChC,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,2BAA2B;IAC3B,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBACd,KAAK,CAAC,IAAI,CAAC;oBACT,OAAO,EAAE,MAAM,EAAE;oBACjB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE,IAAI,CAAC,IAAI;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,gCAAgC;IAChC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,CAAC;QACrC,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YAClC,KAAK,CAAC,IAAI,CAAC;gBACT,OAAO,EAAE,MAAM,EAAE;gBACjB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,IAAI,cAAc;gBAC9C,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE;gBAChC,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE;gBAC3C,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,4DAA4D;IAC5D,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,CAAC;QACvC,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;YACpC,KAAK,CAAC,IAAI,CAAC;gBACT,OAAO,EAAE,MAAM,EAAE;gBACjB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,cAAc;gBACnC,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;gBACpB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE;gBAC3C,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE;QAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,QAAQ;YACnB,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,WAAW;SAC1B;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,EAAE;SACX;QACD,KAAK;QACL,MAAM,EAAE;YACN,MAAM,EAAE,WAAW;SACpB;QACD,UAAU,EAAE;YACV,cAAc,EAAE,aAAa;YAC7B,eAAe,EAAE,IAAI;YACrB,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,oBAAoB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1E,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACtD,OAAO,GAAG,CAAC;AACb,CAAC"}
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { EvalResult, RunOptions } from './types';
2
2
  export { EvalResult, MetricResult, RunOptions, Trace } from './types';
3
+ export * from './adapters';
3
4
  export declare function run(tracePath: string, options: RunOptions): EvalResult;
4
5
  export declare function demo(): EvalResult;
5
6
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAS,MAAM,SAAS,CAAC;AAExD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AAkCtE,wBAAgB,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,GAAG,UAAU,CAmBtE;AAED,wBAAgB,IAAI,IAAI,UAAU,CAuCjC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAS,MAAM,SAAS,CAAC;AAExD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AACtE,cAAc,YAAY,CAAC;AAkC3B,wBAAgB,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,GAAG,UAAU,CAmBtE;AAED,wBAAgB,IAAI,IAAI,UAAU,CAuCjC"}
package/dist/index.js CHANGED
@@ -32,6 +32,9 @@ var __importStar = (this && this.__importStar) || (function () {
32
32
  return result;
33
33
  };
34
34
  })();
35
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
36
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
37
+ };
35
38
  Object.defineProperty(exports, "__esModule", { value: true });
36
39
  exports.run = run;
37
40
  exports.demo = demo;
@@ -40,6 +43,7 @@ const fs = __importStar(require("fs"));
40
43
  const os = __importStar(require("os"));
41
44
  const path = __importStar(require("path"));
42
45
  const binary_1 = require("./binary");
46
+ __exportStar(require("./adapters"), exports);
43
47
  function parseOutput(output) {
44
48
  const metrics = [];
45
49
  const lines = output.split('\n');
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAyCA,kBAmBC;AAED,oBAuCC;AArGD,iDAA0C;AAC1C,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAC7B,qCAAsC;AAKtC,SAAS,WAAW,CAAC,MAAc;IACjC,MAAM,OAAO,GAAG,EAAE,CAAC;IACnB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAEjC,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,SAAS,GAAG,EAAE,CAAC;IACnB,IAAI,aAAa,GAAG,IAAI,CAAC;IAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAE7B,IAAI,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACtE,IAAI,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC;YAAE,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzE,IAAI,IAAI,KAAK,eAAe;YAAE,aAAa,GAAG,KAAK,CAAC;QAEpD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACnE,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;gBACvD,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE;gBAC5C,CAAC,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC;gBACtB,KAAK,EAAE,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;gBACjC,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,MAAM;gBACjC,MAAM;aACP,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AAChE,CAAC;AAED,SAAgB,GAAG,CAAC,SAAiB,EAAE,OAAmB;IACxD,MAAM,MAAM,GAAG,IAAA,mBAAU,GAAE,CAAC;IAC5B,MAAM,IAAI,GAAG;QACX,KAAK;QACL,SAAS,EAAE,SAAS;QACpB,WAAW,EAAE,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC;QACtC,aAAa,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;KAChD,CAAC;IAEF,IAAI,OAAO,CAAC,IAAI;QAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACtC,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAAC,CAAC;IAEzE,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC/B,IAAI,OAAO,CAAC,MAAM;QAAE,GAAG,CAAC,iBAAiB,GAAG,OAAO,CAAC,MAAM,CAAC;IAE3D,MAAM,MAAM,GAAG,IAAA,yBAAS,EAAC,MAAM,EAAE,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;IAClE,IAAI,MAAM,CAAC,KAAK;QAAE,MAAM,MAAM,CAAC,KAAK,CAAC;IAErC,OAAO,WAAW,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;AACpC,CAAC;AAED,SAAgB,IAAI;IAClB,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,aAAa;QACvB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,eAAe;YAC1B,KAAK,EAAE,QAAQ;YACf,UAAU,EAAE,YAAY;YACxB,WAAW,EAAE,IAAI;YACjB,YAAY,EAAE,GAAG;SAClB;QACD,KAAK,EAAE;YACL,IAAI,EAAE,mCAAmC;YACzC,MAAM,EAAE,8BAA8B;SACvC;QACD,KAAK,EAAE;YACL,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,yBAAyB,EAAE;YACnE;gBACE,OAAO,EAAE,CAAC;gBACV,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,EAAE,KAAK,EAAE,sBAAsB,EAAE;gBACxC,MAAM,EAAE,EAAE,MAAM,EAAE,uCAAuC,EAAE;gBAC3D,WAAW,EAAE,GAAG;aACjB;SACF;QACD,MAAM,EAAE,EAAE,MAAM,EAAE,uCAAuC,EAAE;QAC3D,UAAU,EAAE;YACV,cAAc,EAAE,CAAC,YAAY,CAAC;YAC9B,eAAe,EAAE,UAAU;YAC3B,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,kBAAkB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACxE,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IAE7C,OAAO,GAAG,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,cAAc,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;AAC7D,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA0CA,kBAmBC;AAED,oBAuCC;AAtGD,iDAA0C;AAC1C,uCAAyB;AACzB,uCAAyB;AACzB,2CAA6B;AAC7B,qCAAsC;AAItC,6CAA2B;AAE3B,SAAS,WAAW,CAAC,MAAc;IACjC,MAAM,OAAO,GAAG,EAAE,CAAC;IACnB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAEjC,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,SAAS,GAAG,EAAE,CAAC;IACnB,IAAI,aAAa,GAAG,IAAI,CAAC;IAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAE7B,IAAI,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACtE,IAAI,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC;YAAE,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzE,IAAI,IAAI,KAAK,eAAe;YAAE,aAAa,GAAG,KAAK,CAAC;QAEpD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACnE,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;gBACvD,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE;gBAC5C,CAAC,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC;gBACtB,KAAK,EAAE,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;gBACjC,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,MAAM;gBACjC,MAAM;aACP,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AAChE,CAAC;AAED,SAAgB,GAAG,CAAC,SAAiB,EAAE,OAAmB;IACxD,MAAM,MAAM,GAAG,IAAA,mBAAU,GAAE,CAAC;IAC5B,MAAM,IAAI,GAAG;QACX,KAAK;QACL,SAAS,EAAE,SAAS;QACpB,WAAW,EAAE,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC;QACtC,aAAa,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;KAChD,CAAC;IAEF,IAAI,OAAO,CAAC,IAAI;QAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACtC,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAAC,CAAC;IAEzE,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC/B,IAAI,OAAO,CAAC,MAAM;QAAE,GAAG,CAAC,iBAAiB,GAAG,OAAO,CAAC,MAAM,CAAC;IAE3D,MAAM,MAAM,GAAG,IAAA,yBAAS,EAAC,MAAM,EAAE,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;IAClE,IAAI,MAAM,CAAC,KAAK;QAAE,MAAM,MAAM,CAAC,KAAK,CAAC;IAErC,OAAO,WAAW,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;AACpC,CAAC;AAED,SAAgB,IAAI;IAClB,MAAM,KAAK,GAAU;QACnB,iBAAiB,EAAE,KAAK;QACxB,QAAQ,EAAE,aAAa;QACvB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE;YACR,SAAS,EAAE,eAAe;YAC1B,KAAK,EAAE,QAAQ;YACf,UAAU,EAAE,YAAY;YACxB,WAAW,EAAE,IAAI;YACjB,YAAY,EAAE,GAAG;SAClB;QACD,KAAK,EAAE;YACL,IAAI,EAAE,mCAAmC;YACzC,MAAM,EAAE,8BAA8B;SACvC;QACD,KAAK,EAAE;YACL,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,yBAAyB,EAAE;YACnE;gBACE,OAAO,EAAE,CAAC;gBACV,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,EAAE,KAAK,EAAE,sBAAsB,EAAE;gBACxC,MAAM,EAAE,EAAE,MAAM,EAAE,uCAAuC,EAAE;gBAC3D,WAAW,EAAE,GAAG;aACjB;SACF;QACD,MAAM,EAAE,EAAE,MAAM,EAAE,uCAAuC,EAAE;QAC3D,UAAU,EAAE;YACV,cAAc,EAAE,CAAC,YAAY,CAAC;YAC9B,eAAe,EAAE,UAAU;YAC3B,iBAAiB,EAAE,EAAE;SACtB;KACF,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,kBAAkB,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACxE,EAAE,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IAE7C,OAAO,GAAG,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,cAAc,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;AAC7D,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "evalforge",
3
- "version": "0.7.0",
3
+ "version": "0.7.1",
4
4
  "description": "Framework-agnostic LLM agent evaluation harness",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1 @@
1
+ export { fromMastra } from './mastra';
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Mastra adapter for EvalForge.
3
+ *
4
+ * Usage:
5
+ * import { Agent } from '@mastra/core';
6
+ * import { fromMastra } from 'evalforge/adapters/mastra';
7
+ * import { run } from 'evalforge';
8
+ *
9
+ * const agent = new Agent({ name: 'my-agent', ... });
10
+ * const result = await agent.generate('What is the capital of France?');
11
+ *
12
+ * const tracePath = fromMastra(result, { agentName: 'my-agent', model: 'gpt-4o' });
13
+ * const evalResult = run(tracePath, { metrics: ['faithfulness'] });
14
+ */
15
+
16
+ import * as fs from 'fs';
17
+ import * as os from 'os';
18
+ import * as path from 'path';
19
+ import { Trace } from '../types';
20
+
21
+ export interface MastraAdapterOptions {
22
+ agentName?: string;
23
+ model?: string;
24
+ question?: string;
25
+ expectedTools?: string[];
26
+ }
27
+
28
+ export function fromMastra(
29
+ result: any,
30
+ options: MastraAdapterOptions = {}
31
+ ): string {
32
+ /**
33
+ * Convert Mastra agent.generate() result to EvalForge trace JSON file.
34
+ *
35
+ * Supports:
36
+ * - result.text — final answer string
37
+ * - result.steps — array of reasoning steps
38
+ * - result.toolCalls — array of tool invocations
39
+ * - result.usage — token usage stats
40
+ */
41
+
42
+ const {
43
+ agentName = 'mastra-agent',
44
+ model = 'unknown',
45
+ question = '',
46
+ expectedTools = [],
47
+ } = options;
48
+
49
+ const steps: Trace['steps'] = [];
50
+ let stepId = 1;
51
+ let finalAnswer = '';
52
+ let totalTokens = 0;
53
+
54
+ // Extract final answer
55
+ if (typeof result === 'string') {
56
+ finalAnswer = result;
57
+ } else if (result?.text) {
58
+ finalAnswer = result.text;
59
+ } else if (result?.content) {
60
+ finalAnswer = String(result.content);
61
+ }
62
+
63
+ // Extract token usage
64
+ if (result?.usage) {
65
+ totalTokens = (result.usage.promptTokens ?? 0) +
66
+ (result.usage.completionTokens ?? 0);
67
+ }
68
+
69
+ // Extract steps if present
70
+ if (Array.isArray(result?.steps)) {
71
+ for (const step of result.steps) {
72
+ if (step.text) {
73
+ steps.push({
74
+ step_id: stepId++,
75
+ type: 'thought',
76
+ content: step.text,
77
+ });
78
+ }
79
+ }
80
+ }
81
+
82
+ // Extract tool calls if present
83
+ if (Array.isArray(result?.toolCalls)) {
84
+ for (const tc of result.toolCalls) {
85
+ steps.push({
86
+ step_id: stepId++,
87
+ type: 'tool_call',
88
+ tool: tc.toolName ?? tc.name ?? 'unknown_tool',
89
+ input: tc.args ?? tc.input ?? {},
90
+ output: { result: String(tc.result ?? '') },
91
+ duration_ms: 0,
92
+ });
93
+ }
94
+ }
95
+
96
+ // Extract tool results from steps if toolCalls not separate
97
+ if (Array.isArray(result?.toolResults)) {
98
+ for (const tr of result.toolResults) {
99
+ steps.push({
100
+ step_id: stepId++,
101
+ type: 'tool_call',
102
+ tool: tr.toolName ?? 'unknown_tool',
103
+ input: tr.args ?? {},
104
+ output: { result: String(tr.result ?? '') },
105
+ duration_ms: 0,
106
+ });
107
+ }
108
+ }
109
+
110
+ const trace: Trace = {
111
+ evalforge_version: '0.1',
112
+ trace_id: `mastra-${Date.now()}`,
113
+ timestamp: new Date().toISOString(),
114
+ metadata: {
115
+ framework: 'mastra',
116
+ model,
117
+ agent_name: agentName,
118
+ duration_ms: 0,
119
+ total_tokens: totalTokens,
120
+ },
121
+ input: {
122
+ user: question,
123
+ system: '',
124
+ },
125
+ steps,
126
+ output: {
127
+ answer: finalAnswer,
128
+ },
129
+ eval_hints: {
130
+ expected_tools: expectedTools,
131
+ expected_answer: null,
132
+ context_documents: [],
133
+ },
134
+ };
135
+
136
+ const tmp = path.join(os.tmpdir(), `evalforge_mastra_${Date.now()}.json`);
137
+ fs.writeFileSync(tmp, JSON.stringify(trace, null, 2));
138
+ return tmp;
139
+ }
package/src/index.ts CHANGED
@@ -6,6 +6,7 @@ import { findBinary } from './binary';
6
6
  import { EvalResult, RunOptions, Trace } from './types';
7
7
 
8
8
  export { EvalResult, MetricResult, RunOptions, Trace } from './types';
9
+ export * from './adapters';
9
10
 
10
11
  function parseOutput(output: string): EvalResult {
11
12
  const metrics = [];
@@ -0,0 +1,55 @@
1
+ const { test } = require('node:test');
2
+ const assert = require('node:assert');
3
+ const path = require('path');
4
+ const fs = require('fs');
5
+
6
+ const { fromMastra } = require('../dist/adapters/index.js');
7
+
8
+ test('fromMastra with text result', () => {
9
+ const result = { text: 'The capital of France is Paris.' };
10
+ const tracePath = fromMastra(result, {
11
+ agentName: 'test-agent',
12
+ model: 'gpt-4o',
13
+ question: 'What is the capital of France?'
14
+ });
15
+ const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
16
+ assert.strictEqual(trace.metadata.framework, 'mastra');
17
+ assert.strictEqual(trace.output.answer, 'The capital of France is Paris.');
18
+ assert.strictEqual(trace.input.user, 'What is the capital of France?');
19
+ });
20
+
21
+ test('fromMastra with tool calls', () => {
22
+ const result = {
23
+ text: 'Canberra is the capital.',
24
+ toolCalls: [{
25
+ toolName: 'web_search',
26
+ args: { query: 'capital of Australia' },
27
+ result: 'Canberra is the capital of Australia.'
28
+ }]
29
+ };
30
+ const tracePath = fromMastra(result, {
31
+ question: 'What is the capital of Australia?',
32
+ expectedTools: ['web_search']
33
+ });
34
+ const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
35
+ assert.strictEqual(trace.steps.length, 1);
36
+ assert.strictEqual(trace.steps[0].type, 'tool_call');
37
+ assert.strictEqual(trace.steps[0].tool, 'web_search');
38
+ assert.deepStrictEqual(trace.eval_hints.expected_tools, ['web_search']);
39
+ });
40
+
41
+ test('fromMastra with string result', () => {
42
+ const tracePath = fromMastra('Direct answer.', { agentName: 'simple' });
43
+ const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
44
+ assert.strictEqual(trace.output.answer, 'Direct answer.');
45
+ });
46
+
47
+ test('fromMastra with token usage', () => {
48
+ const result = {
49
+ text: 'Answer.',
50
+ usage: { promptTokens: 100, completionTokens: 50 }
51
+ };
52
+ const tracePath = fromMastra(result);
53
+ const trace = JSON.parse(fs.readFileSync(tracePath, 'utf8'));
54
+ assert.strictEqual(trace.metadata.total_tokens, 150);
55
+ });