@pauly4010/evalai-sdk 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/README.md +102 -8
- package/dist/cli/api.d.ts +79 -0
- package/dist/cli/api.js +74 -0
- package/dist/cli/check.d.ts +15 -12
- package/dist/cli/check.js +113 -134
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +51 -0
- package/dist/cli/config.d.ts +24 -0
- package/dist/cli/config.js +158 -0
- package/dist/cli/constants.d.ts +13 -0
- package/dist/cli/constants.js +16 -0
- package/dist/cli/doctor.d.ts +11 -0
- package/dist/cli/doctor.js +82 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +119 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +92 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/types.d.ts +76 -0
- package/dist/cli/formatters/types.js +5 -0
- package/dist/cli/gate.d.ts +13 -0
- package/dist/cli/gate.js +108 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +31 -5
- package/dist/cli/init.d.ts +7 -0
- package/dist/cli/init.js +69 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +16 -0
- package/dist/cli/report/build-check-report.js +94 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +4 -1
- package/dist/integrations/openai-eval.d.ts +53 -0
- package/dist/integrations/openai-eval.js +226 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +38 -0
- package/package.json +5 -1
- package/dist/__tests__/assertions.test.d.ts +0 -1
- package/dist/__tests__/assertions.test.js +0 -288
- package/dist/__tests__/client.test.d.ts +0 -1
- package/dist/__tests__/client.test.js +0 -185
- package/dist/__tests__/testing.test.d.ts +0 -1
- package/dist/__tests__/testing.test.js +0 -230
- package/dist/__tests__/workflows.test.d.ts +0 -1
- package/dist/__tests__/workflows.test.js +0 -222
|
@@ -1,230 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const vitest_1 = require("vitest");
|
|
4
|
-
const testing_1 = require("../testing");
|
|
5
|
-
const assertions_1 = require("../assertions");
|
|
6
|
-
(0, vitest_1.describe)('TestSuite', () => {
|
|
7
|
-
(0, vitest_1.describe)('basic execution', () => {
|
|
8
|
-
(0, vitest_1.it)('should run test cases with an executor', async () => {
|
|
9
|
-
const suite = (0, testing_1.createTestSuite)('basic-tests', {
|
|
10
|
-
cases: [
|
|
11
|
-
{
|
|
12
|
-
input: 'Hello',
|
|
13
|
-
assertions: [
|
|
14
|
-
(output) => (0, assertions_1.expect)(output).toContain('Hello'),
|
|
15
|
-
],
|
|
16
|
-
},
|
|
17
|
-
],
|
|
18
|
-
executor: async (input) => `Echo: ${input}`,
|
|
19
|
-
});
|
|
20
|
-
const result = await suite.run();
|
|
21
|
-
(0, vitest_1.expect)(result.name).toBe('basic-tests');
|
|
22
|
-
(0, vitest_1.expect)(result.total).toBe(1);
|
|
23
|
-
(0, vitest_1.expect)(result.passed).toBe(1);
|
|
24
|
-
(0, vitest_1.expect)(result.failed).toBe(0);
|
|
25
|
-
(0, vitest_1.expect)(result.results[0].passed).toBe(true);
|
|
26
|
-
});
|
|
27
|
-
(0, vitest_1.it)('should fail when assertion fails', async () => {
|
|
28
|
-
const suite = (0, testing_1.createTestSuite)('fail-tests', {
|
|
29
|
-
cases: [
|
|
30
|
-
{
|
|
31
|
-
input: 'Hello',
|
|
32
|
-
assertions: [
|
|
33
|
-
(output) => (0, assertions_1.expect)(output).toContain('missing keyword'),
|
|
34
|
-
],
|
|
35
|
-
},
|
|
36
|
-
],
|
|
37
|
-
executor: async (input) => `Echo: ${input}`,
|
|
38
|
-
});
|
|
39
|
-
const result = await suite.run();
|
|
40
|
-
(0, vitest_1.expect)(result.passed).toBe(0);
|
|
41
|
-
(0, vitest_1.expect)(result.failed).toBe(1);
|
|
42
|
-
(0, vitest_1.expect)(result.results[0].passed).toBe(false);
|
|
43
|
-
});
|
|
44
|
-
});
|
|
45
|
-
(0, vitest_1.describe)('default equality check', () => {
|
|
46
|
-
(0, vitest_1.it)('should use toEqual when expected is provided without assertions', async () => {
|
|
47
|
-
const suite = (0, testing_1.createTestSuite)('equality-tests', {
|
|
48
|
-
cases: [
|
|
49
|
-
{ input: 'hello', expected: 'hello' },
|
|
50
|
-
],
|
|
51
|
-
// No executor — uses expected as actual
|
|
52
|
-
});
|
|
53
|
-
const result = await suite.run();
|
|
54
|
-
(0, vitest_1.expect)(result.passed).toBe(1);
|
|
55
|
-
});
|
|
56
|
-
(0, vitest_1.it)('should fail when expected does not match', async () => {
|
|
57
|
-
const suite = (0, testing_1.createTestSuite)('equality-fail', {
|
|
58
|
-
cases: [
|
|
59
|
-
{ input: 'hello', expected: 'world' },
|
|
60
|
-
],
|
|
61
|
-
executor: async (input) => input, // Returns 'hello', not 'world'
|
|
62
|
-
});
|
|
63
|
-
const result = await suite.run();
|
|
64
|
-
(0, vitest_1.expect)(result.failed).toBe(1);
|
|
65
|
-
});
|
|
66
|
-
});
|
|
67
|
-
(0, vitest_1.describe)('parallel execution', () => {
|
|
68
|
-
(0, vitest_1.it)('should run tests in parallel', async () => {
|
|
69
|
-
const order = [];
|
|
70
|
-
const suite = (0, testing_1.createTestSuite)('parallel-tests', {
|
|
71
|
-
cases: [
|
|
72
|
-
{ id: '1', input: 'a', expected: 'a' },
|
|
73
|
-
{ id: '2', input: 'b', expected: 'b' },
|
|
74
|
-
{ id: '3', input: 'c', expected: 'c' },
|
|
75
|
-
],
|
|
76
|
-
executor: async (input) => {
|
|
77
|
-
order.push(parseInt(input, 36) - 9); // a=1, b=2, c=3
|
|
78
|
-
return input;
|
|
79
|
-
},
|
|
80
|
-
parallel: true,
|
|
81
|
-
});
|
|
82
|
-
const result = await suite.run();
|
|
83
|
-
(0, vitest_1.expect)(result.total).toBe(3);
|
|
84
|
-
(0, vitest_1.expect)(result.passed).toBe(3);
|
|
85
|
-
});
|
|
86
|
-
});
|
|
87
|
-
(0, vitest_1.describe)('sequential execution', () => {
|
|
88
|
-
(0, vitest_1.it)('should run tests sequentially', async () => {
|
|
89
|
-
const order = [];
|
|
90
|
-
const suite = (0, testing_1.createTestSuite)('sequential-tests', {
|
|
91
|
-
cases: [
|
|
92
|
-
{ id: 'first', input: 'a', expected: 'a' },
|
|
93
|
-
{ id: 'second', input: 'b', expected: 'b' },
|
|
94
|
-
],
|
|
95
|
-
executor: async (input) => {
|
|
96
|
-
order.push(input);
|
|
97
|
-
return input;
|
|
98
|
-
},
|
|
99
|
-
parallel: false,
|
|
100
|
-
});
|
|
101
|
-
const result = await suite.run();
|
|
102
|
-
(0, vitest_1.expect)(order).toEqual(['a', 'b']);
|
|
103
|
-
(0, vitest_1.expect)(result.passed).toBe(2);
|
|
104
|
-
});
|
|
105
|
-
});
|
|
106
|
-
(0, vitest_1.describe)('stopOnFailure', () => {
|
|
107
|
-
(0, vitest_1.it)('should stop after first failure when enabled', async () => {
|
|
108
|
-
const suite = (0, testing_1.createTestSuite)('stop-on-fail', {
|
|
109
|
-
cases: [
|
|
110
|
-
{ id: 'pass', input: 'hello', expected: 'hello' },
|
|
111
|
-
{ id: 'fail', input: 'hello', expected: 'nope' },
|
|
112
|
-
{ id: 'skip', input: 'hello', expected: 'hello' },
|
|
113
|
-
],
|
|
114
|
-
executor: async (input) => input,
|
|
115
|
-
parallel: false,
|
|
116
|
-
stopOnFailure: true,
|
|
117
|
-
});
|
|
118
|
-
const result = await suite.run();
|
|
119
|
-
(0, vitest_1.expect)(result.total).toBe(2); // Only 2 ran
|
|
120
|
-
(0, vitest_1.expect)(result.passed).toBe(1);
|
|
121
|
-
(0, vitest_1.expect)(result.failed).toBe(1);
|
|
122
|
-
});
|
|
123
|
-
});
|
|
124
|
-
(0, vitest_1.describe)('timeout', () => {
|
|
125
|
-
(0, vitest_1.beforeEach)(() => {
|
|
126
|
-
vitest_1.vi.useFakeTimers();
|
|
127
|
-
});
|
|
128
|
-
(0, vitest_1.afterEach)(() => {
|
|
129
|
-
vitest_1.vi.useRealTimers();
|
|
130
|
-
});
|
|
131
|
-
(0, vitest_1.it)('should timeout slow tests', async () => {
|
|
132
|
-
const suite = (0, testing_1.createTestSuite)('timeout-tests', {
|
|
133
|
-
cases: [
|
|
134
|
-
{ id: 'slow', input: 'hello' },
|
|
135
|
-
],
|
|
136
|
-
executor: async (_input) => {
|
|
137
|
-
return new Promise((resolve) => {
|
|
138
|
-
setTimeout(() => resolve('done'), 60000);
|
|
139
|
-
});
|
|
140
|
-
},
|
|
141
|
-
timeout: 100,
|
|
142
|
-
parallel: false,
|
|
143
|
-
});
|
|
144
|
-
const runPromise = suite.run();
|
|
145
|
-
// Advance timers past the timeout
|
|
146
|
-
vitest_1.vi.advanceTimersByTime(200);
|
|
147
|
-
const result = await runPromise;
|
|
148
|
-
(0, vitest_1.expect)(result.results[0].passed).toBe(false);
|
|
149
|
-
(0, vitest_1.expect)(result.results[0].error).toContain('timeout');
|
|
150
|
-
});
|
|
151
|
-
});
|
|
152
|
-
(0, vitest_1.describe)('error handling', () => {
|
|
153
|
-
(0, vitest_1.it)('should catch executor errors gracefully', async () => {
|
|
154
|
-
const suite = (0, testing_1.createTestSuite)('error-tests', {
|
|
155
|
-
cases: [
|
|
156
|
-
{ input: 'hello' },
|
|
157
|
-
],
|
|
158
|
-
executor: async () => {
|
|
159
|
-
throw new Error('executor broke');
|
|
160
|
-
},
|
|
161
|
-
});
|
|
162
|
-
const result = await suite.run();
|
|
163
|
-
(0, vitest_1.expect)(result.results[0].passed).toBe(false);
|
|
164
|
-
(0, vitest_1.expect)(result.results[0].error).toBe('executor broke');
|
|
165
|
-
});
|
|
166
|
-
(0, vitest_1.it)('should fail when no executor and no expected', async () => {
|
|
167
|
-
const suite = (0, testing_1.createTestSuite)('no-exec', {
|
|
168
|
-
cases: [{ input: 'hello' }],
|
|
169
|
-
});
|
|
170
|
-
const result = await suite.run();
|
|
171
|
-
(0, vitest_1.expect)(result.results[0].passed).toBe(false);
|
|
172
|
-
(0, vitest_1.expect)(result.results[0].error).toContain('No executor');
|
|
173
|
-
});
|
|
174
|
-
});
|
|
175
|
-
(0, vitest_1.describe)('addCase', () => {
|
|
176
|
-
(0, vitest_1.it)('should allow adding cases after construction', async () => {
|
|
177
|
-
const suite = (0, testing_1.createTestSuite)('dynamic', {
|
|
178
|
-
cases: [],
|
|
179
|
-
executor: async (input) => input,
|
|
180
|
-
});
|
|
181
|
-
suite.addCase({ input: 'test', expected: 'test' });
|
|
182
|
-
const result = await suite.run();
|
|
183
|
-
(0, vitest_1.expect)(result.total).toBe(1);
|
|
184
|
-
(0, vitest_1.expect)(result.passed).toBe(1);
|
|
185
|
-
});
|
|
186
|
-
});
|
|
187
|
-
(0, vitest_1.describe)('custom assertion IDs', () => {
|
|
188
|
-
(0, vitest_1.it)('should use provided IDs', async () => {
|
|
189
|
-
const suite = (0, testing_1.createTestSuite)('ids', {
|
|
190
|
-
cases: [
|
|
191
|
-
{ id: 'custom-id', input: 'test', expected: 'test' },
|
|
192
|
-
],
|
|
193
|
-
});
|
|
194
|
-
const result = await suite.run();
|
|
195
|
-
(0, vitest_1.expect)(result.results[0].id).toBe('custom-id');
|
|
196
|
-
});
|
|
197
|
-
(0, vitest_1.it)('should generate IDs when not provided', async () => {
|
|
198
|
-
const suite = (0, testing_1.createTestSuite)('auto-ids', {
|
|
199
|
-
cases: [
|
|
200
|
-
{ input: 'test', expected: 'test' },
|
|
201
|
-
],
|
|
202
|
-
});
|
|
203
|
-
const result = await suite.run();
|
|
204
|
-
(0, vitest_1.expect)(result.results[0].id).toBe('case-0');
|
|
205
|
-
});
|
|
206
|
-
});
|
|
207
|
-
});
|
|
208
|
-
(0, vitest_1.describe)('Testing helper functions', () => {
|
|
209
|
-
(0, vitest_1.it)('containsKeywords returns an assertion function', () => {
|
|
210
|
-
const assertFn = (0, testing_1.containsKeywords)(['hello', 'world']);
|
|
211
|
-
const result = assertFn('hello world');
|
|
212
|
-
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
213
|
-
(0, vitest_1.expect)(result.name).toBe('toContainKeywords');
|
|
214
|
-
});
|
|
215
|
-
(0, vitest_1.it)('matchesPattern returns an assertion function', () => {
|
|
216
|
-
const assertFn = (0, testing_1.matchesPattern)(/\d{3}/);
|
|
217
|
-
const result = assertFn('code: 123');
|
|
218
|
-
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
219
|
-
});
|
|
220
|
-
(0, vitest_1.it)('hasSentiment returns an assertion function', () => {
|
|
221
|
-
const assertFn = (0, testing_1.hasSentiment)('positive');
|
|
222
|
-
const result = assertFn('This is great!');
|
|
223
|
-
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
224
|
-
});
|
|
225
|
-
(0, vitest_1.it)('hasLength returns an assertion function', () => {
|
|
226
|
-
const assertFn = (0, testing_1.hasLength)({ min: 5, max: 50 });
|
|
227
|
-
const result = assertFn('hello world');
|
|
228
|
-
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
229
|
-
});
|
|
230
|
-
});
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const vitest_1 = require("vitest");
|
|
4
|
-
const workflows_1 = require("../workflows");
|
|
5
|
-
const client_1 = require("../client");
|
|
6
|
-
// Mock fetch
|
|
7
|
-
const mockFetch = vitest_1.vi.fn();
|
|
8
|
-
function createMockClient() {
|
|
9
|
-
vitest_1.vi.stubGlobal('fetch', mockFetch);
|
|
10
|
-
mockFetch.mockResolvedValue({
|
|
11
|
-
ok: true,
|
|
12
|
-
json: async () => ({ id: 1, traceId: 'trace-1', name: 'test' }),
|
|
13
|
-
status: 200,
|
|
14
|
-
});
|
|
15
|
-
return new client_1.AIEvalClient({ apiKey: 'test-key', baseUrl: 'http://localhost:3000', organizationId: 1 });
|
|
16
|
-
}
|
|
17
|
-
(0, vitest_1.describe)('WorkflowTracer', () => {
|
|
18
|
-
let client;
|
|
19
|
-
let tracer;
|
|
20
|
-
(0, vitest_1.beforeEach)(() => {
|
|
21
|
-
client = createMockClient();
|
|
22
|
-
tracer = new workflows_1.WorkflowTracer(client, { organizationId: 1 });
|
|
23
|
-
});
|
|
24
|
-
(0, vitest_1.afterEach)(() => {
|
|
25
|
-
vitest_1.vi.unstubAllGlobals();
|
|
26
|
-
mockFetch.mockReset();
|
|
27
|
-
});
|
|
28
|
-
(0, vitest_1.describe)('workflow lifecycle', () => {
|
|
29
|
-
(0, vitest_1.it)('should start and end a workflow', async () => {
|
|
30
|
-
const workflow = await tracer.startWorkflow('Test Pipeline');
|
|
31
|
-
(0, vitest_1.expect)(workflow.name).toBe('Test Pipeline');
|
|
32
|
-
(0, vitest_1.expect)(tracer.isWorkflowActive()).toBe(true);
|
|
33
|
-
await tracer.endWorkflow({ status: 'success' });
|
|
34
|
-
(0, vitest_1.expect)(tracer.isWorkflowActive()).toBe(false);
|
|
35
|
-
});
|
|
36
|
-
(0, vitest_1.it)('should throw if starting a second workflow', async () => {
|
|
37
|
-
await tracer.startWorkflow('First');
|
|
38
|
-
await (0, vitest_1.expect)(tracer.startWorkflow('Second')).rejects.toThrow('already active');
|
|
39
|
-
});
|
|
40
|
-
(0, vitest_1.it)('should throw if ending without starting', async () => {
|
|
41
|
-
await (0, vitest_1.expect)(tracer.endWorkflow()).rejects.toThrow('No active workflow');
|
|
42
|
-
});
|
|
43
|
-
});
|
|
44
|
-
(0, vitest_1.describe)('agent spans', () => {
|
|
45
|
-
(0, vitest_1.it)('should start and end agent spans', async () => {
|
|
46
|
-
await tracer.startWorkflow('Pipeline');
|
|
47
|
-
const span = await tracer.startAgentSpan('RouterAgent', { input: 'hello' });
|
|
48
|
-
(0, vitest_1.expect)(span.agentName).toBe('RouterAgent');
|
|
49
|
-
(0, vitest_1.expect)(span.spanId).toBeTruthy();
|
|
50
|
-
await tracer.endAgentSpan(span, { result: 'routed' });
|
|
51
|
-
await tracer.endWorkflow();
|
|
52
|
-
});
|
|
53
|
-
(0, vitest_1.it)('should throw if no workflow is active', async () => {
|
|
54
|
-
await (0, vitest_1.expect)(tracer.startAgentSpan('Agent')).rejects.toThrow('No active workflow');
|
|
55
|
-
});
|
|
56
|
-
});
|
|
57
|
-
(0, vitest_1.describe)('decision recording', () => {
|
|
58
|
-
(0, vitest_1.it)('should record decisions', async () => {
|
|
59
|
-
await tracer.startWorkflow('Pipeline');
|
|
60
|
-
await tracer.recordDecision({
|
|
61
|
-
agent: 'Router',
|
|
62
|
-
type: 'route',
|
|
63
|
-
chosen: 'technical_support',
|
|
64
|
-
alternatives: [{ action: 'billing', confidence: 30 }],
|
|
65
|
-
reasoning: 'Detected technical keywords',
|
|
66
|
-
confidence: 85,
|
|
67
|
-
});
|
|
68
|
-
const decisions = tracer.getDecisions();
|
|
69
|
-
(0, vitest_1.expect)(decisions).toHaveLength(1);
|
|
70
|
-
(0, vitest_1.expect)(decisions[0].agent).toBe('Router');
|
|
71
|
-
(0, vitest_1.expect)(decisions[0].chosen).toBe('technical_support');
|
|
72
|
-
(0, vitest_1.expect)(decisions[0].confidence).toBe(85);
|
|
73
|
-
await tracer.endWorkflow();
|
|
74
|
-
});
|
|
75
|
-
(0, vitest_1.it)('should throw if no workflow is active', async () => {
|
|
76
|
-
await (0, vitest_1.expect)(tracer.recordDecision({
|
|
77
|
-
agent: 'Agent',
|
|
78
|
-
type: 'action',
|
|
79
|
-
chosen: 'x',
|
|
80
|
-
alternatives: [],
|
|
81
|
-
})).rejects.toThrow('No active workflow');
|
|
82
|
-
});
|
|
83
|
-
});
|
|
84
|
-
(0, vitest_1.describe)('handoff recording', () => {
|
|
85
|
-
(0, vitest_1.it)('should record handoffs between agents', async () => {
|
|
86
|
-
await tracer.startWorkflow('Pipeline');
|
|
87
|
-
await tracer.recordHandoff('Router', 'TechAgent', { issue: 'API' }, 'delegation');
|
|
88
|
-
const handoffs = tracer.getHandoffs();
|
|
89
|
-
(0, vitest_1.expect)(handoffs).toHaveLength(1);
|
|
90
|
-
(0, vitest_1.expect)(handoffs[0].fromAgent).toBe('Router');
|
|
91
|
-
(0, vitest_1.expect)(handoffs[0].toAgent).toBe('TechAgent');
|
|
92
|
-
(0, vitest_1.expect)(handoffs[0].handoffType).toBe('delegation');
|
|
93
|
-
await tracer.endWorkflow();
|
|
94
|
-
});
|
|
95
|
-
});
|
|
96
|
-
(0, vitest_1.describe)('cost tracking', () => {
|
|
97
|
-
(0, vitest_1.it)('should record and calculate costs', async () => {
|
|
98
|
-
await tracer.startWorkflow('Pipeline');
|
|
99
|
-
const cost = await tracer.recordCost({
|
|
100
|
-
provider: 'openai',
|
|
101
|
-
model: 'gpt-4o',
|
|
102
|
-
inputTokens: 1000,
|
|
103
|
-
outputTokens: 500,
|
|
104
|
-
});
|
|
105
|
-
// gpt-4o pricing: $5/1M input, $15/1M output
|
|
106
|
-
(0, vitest_1.expect)(cost.totalTokens).toBe(1500);
|
|
107
|
-
(0, vitest_1.expect)(parseFloat(cost.inputCost)).toBeCloseTo(0.005, 4);
|
|
108
|
-
(0, vitest_1.expect)(parseFloat(cost.outputCost)).toBeCloseTo(0.0075, 4);
|
|
109
|
-
(0, vitest_1.expect)(parseFloat(cost.totalCost)).toBeCloseTo(0.0125, 4);
|
|
110
|
-
(0, vitest_1.expect)(tracer.getTotalCost()).toBeCloseTo(0.0125, 4);
|
|
111
|
-
await tracer.endWorkflow();
|
|
112
|
-
});
|
|
113
|
-
(0, vitest_1.it)('should track cost breakdown by category', async () => {
|
|
114
|
-
await tracer.startWorkflow('Pipeline');
|
|
115
|
-
await tracer.recordCost({
|
|
116
|
-
provider: 'openai',
|
|
117
|
-
model: 'gpt-4o',
|
|
118
|
-
inputTokens: 1000,
|
|
119
|
-
outputTokens: 500,
|
|
120
|
-
category: 'llm',
|
|
121
|
-
});
|
|
122
|
-
await tracer.recordCost({
|
|
123
|
-
provider: 'openai',
|
|
124
|
-
model: 'gpt-4o',
|
|
125
|
-
inputTokens: 200,
|
|
126
|
-
outputTokens: 100,
|
|
127
|
-
category: 'tool',
|
|
128
|
-
});
|
|
129
|
-
const breakdown = tracer.getCostBreakdown();
|
|
130
|
-
(0, vitest_1.expect)(breakdown.llm).toBeGreaterThan(0);
|
|
131
|
-
(0, vitest_1.expect)(breakdown.tool).toBeGreaterThan(0);
|
|
132
|
-
(0, vitest_1.expect)(breakdown.embedding).toBe(0);
|
|
133
|
-
await tracer.endWorkflow();
|
|
134
|
-
});
|
|
135
|
-
(0, vitest_1.it)('should use default pricing for unknown models', async () => {
|
|
136
|
-
await tracer.startWorkflow('Pipeline');
|
|
137
|
-
const cost = await tracer.recordCost({
|
|
138
|
-
provider: 'custom',
|
|
139
|
-
model: 'unknown-model',
|
|
140
|
-
inputTokens: 1000000,
|
|
141
|
-
outputTokens: 1000000,
|
|
142
|
-
});
|
|
143
|
-
// Default: $1/1M input, $3/1M output = $4 total
|
|
144
|
-
(0, vitest_1.expect)(parseFloat(cost.totalCost)).toBeCloseTo(4.0, 1);
|
|
145
|
-
await tracer.endWorkflow();
|
|
146
|
-
});
|
|
147
|
-
});
|
|
148
|
-
(0, vitest_1.describe)('state resets', () => {
|
|
149
|
-
(0, vitest_1.it)('should reset state when starting a new workflow', async () => {
|
|
150
|
-
await tracer.startWorkflow('First');
|
|
151
|
-
await tracer.recordCost({
|
|
152
|
-
provider: 'openai',
|
|
153
|
-
model: 'gpt-4o',
|
|
154
|
-
inputTokens: 1000,
|
|
155
|
-
outputTokens: 500,
|
|
156
|
-
});
|
|
157
|
-
await tracer.recordHandoff(undefined, 'Agent1');
|
|
158
|
-
await tracer.recordDecision({
|
|
159
|
-
agent: 'A',
|
|
160
|
-
type: 'action',
|
|
161
|
-
chosen: 'x',
|
|
162
|
-
alternatives: [],
|
|
163
|
-
});
|
|
164
|
-
await tracer.endWorkflow();
|
|
165
|
-
// Start fresh workflow
|
|
166
|
-
await tracer.startWorkflow('Second');
|
|
167
|
-
(0, vitest_1.expect)(tracer.getCosts()).toHaveLength(0);
|
|
168
|
-
(0, vitest_1.expect)(tracer.getHandoffs()).toHaveLength(0);
|
|
169
|
-
(0, vitest_1.expect)(tracer.getDecisions()).toHaveLength(0);
|
|
170
|
-
(0, vitest_1.expect)(tracer.getTotalCost()).toBe(0);
|
|
171
|
-
await tracer.endWorkflow();
|
|
172
|
-
});
|
|
173
|
-
});
|
|
174
|
-
});
|
|
175
|
-
(0, vitest_1.describe)('createWorkflowTracer', () => {
|
|
176
|
-
(0, vitest_1.beforeEach)(() => {
|
|
177
|
-
vitest_1.vi.stubGlobal('fetch', vitest_1.vi.fn().mockResolvedValue({
|
|
178
|
-
ok: true,
|
|
179
|
-
json: async () => ({ id: 1, traceId: 't', name: 'n' }),
|
|
180
|
-
status: 200,
|
|
181
|
-
}));
|
|
182
|
-
});
|
|
183
|
-
(0, vitest_1.afterEach)(() => {
|
|
184
|
-
vitest_1.vi.unstubAllGlobals();
|
|
185
|
-
});
|
|
186
|
-
(0, vitest_1.it)('should create a WorkflowTracer instance', () => {
|
|
187
|
-
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000' });
|
|
188
|
-
const tracer = (0, workflows_1.createWorkflowTracer)(client, { organizationId: 1 });
|
|
189
|
-
(0, vitest_1.expect)(tracer).toBeInstanceOf(workflows_1.WorkflowTracer);
|
|
190
|
-
});
|
|
191
|
-
});
|
|
192
|
-
(0, vitest_1.describe)('traceWorkflowStep', () => {
|
|
193
|
-
(0, vitest_1.beforeEach)(() => {
|
|
194
|
-
vitest_1.vi.stubGlobal('fetch', vitest_1.vi.fn().mockResolvedValue({
|
|
195
|
-
ok: true,
|
|
196
|
-
json: async () => ({ id: 1, traceId: 't', name: 'n' }),
|
|
197
|
-
status: 200,
|
|
198
|
-
}));
|
|
199
|
-
});
|
|
200
|
-
(0, vitest_1.afterEach)(() => {
|
|
201
|
-
vitest_1.vi.unstubAllGlobals();
|
|
202
|
-
});
|
|
203
|
-
(0, vitest_1.it)('should trace a function execution', async () => {
|
|
204
|
-
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000', organizationId: 1 });
|
|
205
|
-
const tracer = new workflows_1.WorkflowTracer(client, { organizationId: 1 });
|
|
206
|
-
await tracer.startWorkflow('Test');
|
|
207
|
-
const result = await (0, workflows_1.traceWorkflowStep)(tracer, 'Agent', async () => {
|
|
208
|
-
return 'output';
|
|
209
|
-
});
|
|
210
|
-
(0, vitest_1.expect)(result).toBe('output');
|
|
211
|
-
await tracer.endWorkflow();
|
|
212
|
-
});
|
|
213
|
-
(0, vitest_1.it)('should re-throw errors from the traced function', async () => {
|
|
214
|
-
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000', organizationId: 1 });
|
|
215
|
-
const tracer = new workflows_1.WorkflowTracer(client, { organizationId: 1 });
|
|
216
|
-
await tracer.startWorkflow('Test');
|
|
217
|
-
await (0, vitest_1.expect)((0, workflows_1.traceWorkflowStep)(tracer, 'Agent', async () => {
|
|
218
|
-
throw new Error('agent error');
|
|
219
|
-
})).rejects.toThrow('agent error');
|
|
220
|
-
await tracer.endWorkflow({ status: 'failed' }, 'failed');
|
|
221
|
-
});
|
|
222
|
-
});
|