judgeval 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -68
- package/dist/cjs/common/logger-instance.js +17 -19
- package/dist/cjs/common/logger-instance.js.map +1 -1
- package/dist/cjs/common/tracer.js +210 -126
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +3 -2
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/index.js +1 -3
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/judgment-client.js +20 -114
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/cjs/scorers/api-scorer.js +56 -48
- package/dist/cjs/scorers/api-scorer.js.map +1 -1
- package/dist/cjs/scorers/base-scorer.js +66 -11
- package/dist/cjs/scorers/base-scorer.js.map +1 -1
- package/dist/esm/common/logger-instance.js +17 -19
- package/dist/esm/common/logger-instance.js.map +1 -1
- package/dist/esm/common/tracer.js +211 -127
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +2 -1
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/index.js +0 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/judgment-client.js +20 -114
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/esm/scorers/api-scorer.js +56 -48
- package/dist/esm/scorers/api-scorer.js.map +1 -1
- package/dist/esm/scorers/base-scorer.js +66 -11
- package/dist/esm/scorers/base-scorer.js.map +1 -1
- package/dist/types/common/tracer.d.ts +27 -13
- package/dist/types/constants.d.ts +2 -1
- package/dist/types/index.d.ts +0 -1
- package/dist/types/judgment-client.d.ts +0 -22
- package/dist/types/scorers/api-scorer.d.ts +15 -15
- package/dist/types/scorers/base-scorer.d.ts +53 -10
- package/package.json +10 -3
- package/dist/cjs/scorers/exact-match-scorer.js +0 -84
- package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
- package/dist/esm/scorers/exact-match-scorer.js +0 -80
- package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
- package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
|
@@ -74,16 +74,13 @@ class TraceManagerClient {
|
|
|
74
74
|
try {
|
|
75
75
|
// Use isomorphic fetch (available globally in modern Node.js and browsers)
|
|
76
76
|
const response = yield fetch(url, Object.assign(Object.assign({}, options), { headers: headers }));
|
|
77
|
-
if
|
|
78
|
-
|
|
79
|
-
console.error(`API Error (${response.status}) for ${options.method || 'GET'} ${url}: ${errorBody}`);
|
|
80
|
-
throw new Error(`Judgment API request failed: ${response.status} ${response.statusText} - ${errorBody}`);
|
|
81
|
-
}
|
|
77
|
+
// We will return the response object even if !response.ok
|
|
78
|
+
// The caller (e.g., saveTrace) is responsible for checking response.ok or response.status
|
|
82
79
|
// Handle cases where the response might be empty (e.g., 204 No Content on DELETE)
|
|
83
80
|
if (response.status === 204) {
|
|
84
81
|
return null; // Indicate success with no content
|
|
85
82
|
}
|
|
86
|
-
return
|
|
83
|
+
return response;
|
|
87
84
|
}
|
|
88
85
|
catch (error) {
|
|
89
86
|
console.error(`Network or fetch error during ${options.method || 'GET'} ${url}:`, error);
|
|
@@ -100,21 +97,52 @@ class TraceManagerClient {
|
|
|
100
97
|
});
|
|
101
98
|
});
|
|
102
99
|
}
|
|
103
|
-
saveTrace(traceData
|
|
100
|
+
saveTrace(traceData) {
|
|
104
101
|
return __awaiter(this, void 0, void 0, function* () {
|
|
102
|
+
// _fetch now returns the raw response object or throws on network error
|
|
105
103
|
const response = yield this._fetch(constants_js_1.JUDGMENT_TRACES_SAVE_API_URL, {
|
|
106
104
|
method: 'POST',
|
|
107
|
-
body: JSON.stringify(traceData),
|
|
105
|
+
body: JSON.stringify(traceData), // Stringify directly here again
|
|
108
106
|
});
|
|
109
|
-
//
|
|
110
|
-
if (!
|
|
111
|
-
//
|
|
112
|
-
|
|
107
|
+
// Check if _fetch threw a network error (caught below) or returned an invalid object
|
|
108
|
+
if (!response) {
|
|
109
|
+
// This case should ideally be caught by _fetch's catch block, but double-check
|
|
110
|
+
throw new Error('Failed to save trace data: No response received from API.');
|
|
111
|
+
}
|
|
112
|
+
// Now, check the status code on the received response object
|
|
113
|
+
if (response.status === 400) {
|
|
114
|
+
// Attempt to get error body for more info
|
|
115
|
+
const errorBody = yield response.text();
|
|
116
|
+
throw new Error(`Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: ${response.status} ${response.statusText || ''} - ${errorBody}`);
|
|
117
|
+
}
|
|
118
|
+
else if (!response.ok) { // Handles other errors (5xx, 4xx except 400)
|
|
119
|
+
const errorBody = yield response.text();
|
|
120
|
+
throw new Error(`Failed to save trace data: Status ${response.status} ${response.statusText || '(No status text)'} - ${errorBody}`);
|
|
121
|
+
}
|
|
122
|
+
// --- Success Path ---
|
|
123
|
+
// Optionally log the UI URL (needs JSON parsing)
|
|
124
|
+
let responseData = null;
|
|
125
|
+
try {
|
|
126
|
+
// Handle 204 No Content specifically
|
|
127
|
+
if (response.status === 204) {
|
|
128
|
+
responseData = null; // Or maybe { success: true }?
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
responseData = yield response.json(); // Parse JSON only on success
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
catch (parseError) {
|
|
135
|
+
logger_instance_js_1.default.warn("Failed to parse successful API response JSON.", { error: parseError });
|
|
136
|
+
// Depending on requirements, maybe throw, maybe return a default success object
|
|
137
|
+
throw new Error(`API request succeeded (${response.status}), but failed to parse JSON response.`);
|
|
138
|
+
}
|
|
139
|
+
if (responseData === null || responseData === void 0 ? void 0 : responseData.ui_results_url) {
|
|
113
140
|
console.info(`
|
|
114
|
-
🔍 View trace: ${
|
|
141
|
+
🔍 View trace: ${responseData.ui_results_url}
|
|
115
142
|
`);
|
|
116
143
|
}
|
|
117
|
-
|
|
144
|
+
// Return the parsed data (or null for 204)
|
|
145
|
+
return responseData;
|
|
118
146
|
});
|
|
119
147
|
}
|
|
120
148
|
deleteTrace(traceId) {
|
|
@@ -143,6 +171,35 @@ class TraceManagerClient {
|
|
|
143
171
|
});
|
|
144
172
|
});
|
|
145
173
|
}
|
|
174
|
+
/**
|
|
175
|
+
* Calculate token costs directly using the API endpoint.
|
|
176
|
+
* This is more accurate than client-side calculation as it uses the most up-to-date pricing.
|
|
177
|
+
*
|
|
178
|
+
* @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229')
|
|
179
|
+
* @param promptTokens Number of tokens in the prompt/input
|
|
180
|
+
* @param completionTokens Number of tokens in the completion/output
|
|
181
|
+
* @returns Object containing token counts and calculated costs in USD
|
|
182
|
+
*/
|
|
183
|
+
calculateTokenCosts(model, promptTokens, completionTokens) {
|
|
184
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
185
|
+
try {
|
|
186
|
+
// Use the new calculation endpoint
|
|
187
|
+
const result = yield this._fetch(constants_js_1.JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL, {
|
|
188
|
+
method: 'POST',
|
|
189
|
+
body: JSON.stringify({
|
|
190
|
+
model,
|
|
191
|
+
prompt_tokens: promptTokens,
|
|
192
|
+
completion_tokens: completionTokens
|
|
193
|
+
})
|
|
194
|
+
});
|
|
195
|
+
return result;
|
|
196
|
+
}
|
|
197
|
+
catch (error) {
|
|
198
|
+
logger_instance_js_1.default.warn(`Failed to calculate token costs for model ${model}.`, { error: error instanceof Error ? error.message : String(error) });
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
}
|
|
146
203
|
}
|
|
147
204
|
exports.TraceManagerClient = TraceManagerClient;
|
|
148
205
|
// --- Helper Functions ---
|
|
@@ -160,6 +217,7 @@ class TraceClient {
|
|
|
160
217
|
constructor(config) {
|
|
161
218
|
var _a, _b, _c, _d, _e;
|
|
162
219
|
this.traceManager = null; // Can be null if monitoring disabled
|
|
220
|
+
this._spanDepths = {}; // Track depth of active spans
|
|
163
221
|
this.traceId = config.traceId || (0, uuid_1.v4)();
|
|
164
222
|
this.originalName = config.name || 'default_trace'; // Store original
|
|
165
223
|
this.name = sanitizeName(this.originalName); // Use sanitized name internally
|
|
@@ -198,7 +256,7 @@ class TraceClient {
|
|
|
198
256
|
recordInput(inputs) {
|
|
199
257
|
const traceClientContext = getTraceClientContext();
|
|
200
258
|
const currentEntry = traceClientContext.entryStack.at(-1);
|
|
201
|
-
if (!currentEntry) {
|
|
259
|
+
if (!currentEntry || !currentEntry.span_id) {
|
|
202
260
|
console.warn(`No current entry to record input to\nStack trace: ${new Error().stack}`);
|
|
203
261
|
return;
|
|
204
262
|
}
|
|
@@ -207,14 +265,16 @@ class TraceClient {
|
|
|
207
265
|
span_id: currentEntry.span_id,
|
|
208
266
|
inputs,
|
|
209
267
|
function: currentEntry.function,
|
|
210
|
-
depth: currentEntry.
|
|
211
|
-
|
|
268
|
+
depth: this._spanDepths[currentEntry.span_id],
|
|
269
|
+
created_at: Date.now() / 1000,
|
|
270
|
+
span_type: currentEntry.span_type,
|
|
271
|
+
message: `Inputs to ${currentEntry.function}`
|
|
212
272
|
});
|
|
213
273
|
}
|
|
214
274
|
recordOutput(output) {
|
|
215
275
|
const traceClientContext = getTraceClientContext();
|
|
216
276
|
const currentEntry = traceClientContext.entryStack.at(-1);
|
|
217
|
-
if (!currentEntry) {
|
|
277
|
+
if (!currentEntry || !currentEntry.span_id) {
|
|
218
278
|
console.warn(`No current entry to record output to\nStack trace: ${new Error().stack}`);
|
|
219
279
|
return;
|
|
220
280
|
}
|
|
@@ -223,33 +283,28 @@ class TraceClient {
|
|
|
223
283
|
span_id: currentEntry.span_id,
|
|
224
284
|
output,
|
|
225
285
|
function: currentEntry.function,
|
|
226
|
-
depth: currentEntry.
|
|
227
|
-
|
|
286
|
+
depth: this._spanDepths[currentEntry.span_id],
|
|
287
|
+
created_at: Date.now() / 1000,
|
|
288
|
+
span_type: currentEntry.span_type,
|
|
289
|
+
message: `Output from ${currentEntry.function}`
|
|
228
290
|
});
|
|
229
291
|
}
|
|
230
292
|
recordError(error) {
|
|
231
|
-
var _a;
|
|
232
293
|
const traceClientContext = getTraceClientContext();
|
|
233
294
|
const currentEntry = traceClientContext.entryStack.at(-1);
|
|
234
|
-
if (!currentEntry) {
|
|
295
|
+
if (!currentEntry || !currentEntry.span_id) {
|
|
235
296
|
console.warn(`No current entry to record error to\nStack trace: ${new Error().stack}`);
|
|
236
297
|
return;
|
|
237
298
|
}
|
|
238
|
-
let output = error;
|
|
239
|
-
if (error instanceof Error) {
|
|
240
|
-
output = {
|
|
241
|
-
name: error.name,
|
|
242
|
-
message: error.message,
|
|
243
|
-
stack: (_a = error.stack) === null || _a === void 0 ? void 0 : _a.substring(0, 1000)
|
|
244
|
-
};
|
|
245
|
-
}
|
|
246
299
|
this.addEntry({
|
|
247
300
|
type: 'error',
|
|
248
301
|
span_id: currentEntry.span_id,
|
|
249
|
-
output,
|
|
302
|
+
output: error,
|
|
250
303
|
function: currentEntry.function,
|
|
251
|
-
depth: currentEntry.
|
|
252
|
-
|
|
304
|
+
depth: this._spanDepths[currentEntry.span_id],
|
|
305
|
+
created_at: Date.now() / 1000,
|
|
306
|
+
span_type: currentEntry.span_type,
|
|
307
|
+
message: `Error from ${currentEntry.function}`
|
|
253
308
|
});
|
|
254
309
|
}
|
|
255
310
|
startSpan(name, options = {}) {
|
|
@@ -260,18 +315,20 @@ class TraceClient {
|
|
|
260
315
|
const spanType = (_a = options.spanType) !== null && _a !== void 0 ? _a : 'span';
|
|
261
316
|
const startTime = Date.now() / 1000;
|
|
262
317
|
let depth = 0, parentSpanId = undefined;
|
|
263
|
-
if (parentEntry) {
|
|
264
|
-
depth = parentEntry.
|
|
318
|
+
if (parentEntry && parentEntry.span_id) {
|
|
319
|
+
depth = this._spanDepths[parentEntry.span_id] + 1;
|
|
265
320
|
parentSpanId = parentEntry.span_id;
|
|
266
321
|
}
|
|
322
|
+
this._spanDepths[spanId] = depth;
|
|
267
323
|
const entry = {
|
|
268
324
|
type: 'enter',
|
|
269
325
|
function: name,
|
|
270
326
|
span_id: spanId,
|
|
271
327
|
depth: depth,
|
|
272
|
-
|
|
328
|
+
created_at: startTime,
|
|
273
329
|
span_type: spanType,
|
|
274
|
-
parent_span_id: parentSpanId
|
|
330
|
+
parent_span_id: parentSpanId,
|
|
331
|
+
message: name
|
|
275
332
|
};
|
|
276
333
|
this.addEntry(entry);
|
|
277
334
|
traceClientContext.entryStack.push(entry);
|
|
@@ -279,21 +336,24 @@ class TraceClient {
|
|
|
279
336
|
endSpan() {
|
|
280
337
|
const traceClientContext = getTraceClientContext();
|
|
281
338
|
const enterEntry = traceClientContext.entryStack.pop();
|
|
282
|
-
if (!enterEntry) {
|
|
339
|
+
if (!enterEntry || !enterEntry.span_id) {
|
|
283
340
|
console.warn("No enter entry to end");
|
|
284
341
|
return;
|
|
285
342
|
}
|
|
286
343
|
const endTime = Date.now() / 1000;
|
|
287
|
-
const duration = endTime - enterEntry.
|
|
344
|
+
const duration = endTime - enterEntry.created_at;
|
|
288
345
|
this.addEntry({
|
|
289
346
|
type: 'exit',
|
|
290
347
|
function: enterEntry.function,
|
|
291
348
|
span_id: enterEntry.span_id,
|
|
292
|
-
depth: enterEntry.
|
|
293
|
-
|
|
349
|
+
depth: this._spanDepths[enterEntry.span_id],
|
|
350
|
+
created_at: endTime,
|
|
294
351
|
duration: duration,
|
|
295
|
-
span_type: enterEntry.span_type
|
|
352
|
+
span_type: enterEntry.span_type,
|
|
353
|
+
message: `← ${enterEntry.function}`
|
|
296
354
|
});
|
|
355
|
+
// Clean up depth tracking
|
|
356
|
+
delete this._spanDepths[enterEntry.span_id];
|
|
297
357
|
}
|
|
298
358
|
*span(name, options = {}) {
|
|
299
359
|
if (!this.enableMonitoring) {
|
|
@@ -311,6 +371,7 @@ class TraceClient {
|
|
|
311
371
|
condenseTrace(rawEntries) {
|
|
312
372
|
var _a, _b, _c, _d, _e;
|
|
313
373
|
const spansById = {};
|
|
374
|
+
const allEvaluationRuns = []; // To collect all eval runs
|
|
314
375
|
for (const entry of rawEntries) {
|
|
315
376
|
const spanId = entry.span_id;
|
|
316
377
|
if (!spanId)
|
|
@@ -320,7 +381,8 @@ class TraceClient {
|
|
|
320
381
|
span_id: spanId,
|
|
321
382
|
function: entry.function || 'unknown',
|
|
322
383
|
depth: (_a = entry.depth) !== null && _a !== void 0 ? _a : 0,
|
|
323
|
-
|
|
384
|
+
created_at: new Date(((_b = entry.created_at) !== null && _b !== void 0 ? _b : 0) * 1000).toISOString(), // Convert number to ISO string
|
|
385
|
+
trace_id: this.traceId, // Add trace_id
|
|
324
386
|
parent_span_id: entry.parent_span_id,
|
|
325
387
|
span_type: entry.span_type || 'span',
|
|
326
388
|
inputs: null,
|
|
@@ -335,14 +397,14 @@ class TraceClient {
|
|
|
335
397
|
case 'enter':
|
|
336
398
|
currentSpanData.function = entry.function || currentSpanData.function;
|
|
337
399
|
currentSpanData.depth = (_c = entry.depth) !== null && _c !== void 0 ? _c : currentSpanData.depth;
|
|
338
|
-
currentSpanData.
|
|
400
|
+
currentSpanData.created_at = new Date(((_d = entry.created_at) !== null && _d !== void 0 ? _d : 0) * 1000).toISOString(); // Ensure created_at is string on update
|
|
339
401
|
currentSpanData.parent_span_id = entry.parent_span_id;
|
|
340
402
|
currentSpanData.span_type = entry.span_type || currentSpanData.span_type;
|
|
341
|
-
currentSpanData.start_time = entry.
|
|
403
|
+
currentSpanData.start_time = entry.created_at; // Keep original number for duration calc
|
|
342
404
|
break;
|
|
343
405
|
case 'exit':
|
|
344
406
|
currentSpanData.duration = (_e = entry.duration) !== null && _e !== void 0 ? _e : currentSpanData.duration;
|
|
345
|
-
currentSpanData.end_time = entry.
|
|
407
|
+
currentSpanData.end_time = entry.created_at; // Keep original number for duration calc
|
|
346
408
|
if (currentSpanData.duration === null && currentSpanData.start_time && currentSpanData.end_time) {
|
|
347
409
|
currentSpanData.duration = currentSpanData.end_time - currentSpanData.start_time;
|
|
348
410
|
}
|
|
@@ -360,8 +422,11 @@ class TraceClient {
|
|
|
360
422
|
currentSpanData.output = entry.output;
|
|
361
423
|
break;
|
|
362
424
|
case 'evaluation':
|
|
363
|
-
if
|
|
364
|
-
|
|
425
|
+
// Check if evaluation_runs is an array and has at least one element
|
|
426
|
+
if (Array.isArray(entry.evaluation_runs) && entry.evaluation_runs.length > 0) {
|
|
427
|
+
const evalPayload = entry.evaluation_runs[0]; // Extract the payload object
|
|
428
|
+
currentSpanData.evaluation_runs.push(evalPayload); // Add the object to the span's list
|
|
429
|
+
allEvaluationRuns.push(evalPayload); // Add the object to the central list
|
|
365
430
|
}
|
|
366
431
|
break;
|
|
367
432
|
}
|
|
@@ -392,9 +457,11 @@ class TraceClient {
|
|
|
392
457
|
childrenMap[parentId].push(span);
|
|
393
458
|
}
|
|
394
459
|
}
|
|
395
|
-
|
|
460
|
+
// Sort using parsed dates
|
|
461
|
+
roots.sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
|
|
396
462
|
for (const parentId in childrenMap) {
|
|
397
|
-
|
|
463
|
+
// Sort using parsed dates
|
|
464
|
+
childrenMap[parentId].sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
|
|
398
465
|
}
|
|
399
466
|
function buildFlatListDfs(span) {
|
|
400
467
|
if (visited.has(span.span_id))
|
|
@@ -415,26 +482,36 @@ class TraceClient {
|
|
|
415
482
|
buildFlatListDfs(span);
|
|
416
483
|
}
|
|
417
484
|
}
|
|
418
|
-
return sortedCondensedList;
|
|
485
|
+
return [sortedCondensedList, allEvaluationRuns]; // Return both
|
|
419
486
|
}
|
|
420
487
|
save() {
|
|
421
488
|
return __awaiter(this, arguments, void 0, function* (emptySave = false) {
|
|
489
|
+
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
422
490
|
if (!this.enableMonitoring || !this.traceManager) {
|
|
423
491
|
return null;
|
|
424
492
|
}
|
|
425
493
|
const traceClientContext = getTraceClientContext();
|
|
426
494
|
const totalDuration = this.getDuration();
|
|
427
|
-
|
|
495
|
+
// Use the tuple returned by condenseTrace
|
|
496
|
+
const [condensedEntries, evaluationRuns] = this.condenseTrace(traceClientContext.entries);
|
|
428
497
|
const tokenCounts = {
|
|
429
|
-
prompt_tokens: 0,
|
|
430
|
-
|
|
498
|
+
prompt_tokens: 0,
|
|
499
|
+
completion_tokens: 0,
|
|
500
|
+
total_tokens: 0,
|
|
501
|
+
prompt_tokens_cost_usd: 0.0,
|
|
502
|
+
completion_tokens_cost_usd: 0.0,
|
|
503
|
+
total_cost_usd: 0.0
|
|
431
504
|
};
|
|
432
|
-
|
|
433
|
-
|
|
505
|
+
// First pass: collect all LLM calls with their token counts
|
|
506
|
+
const llmCalls = [];
|
|
507
|
+
let index = 0;
|
|
508
|
+
for (const entry of condensedEntries) {
|
|
434
509
|
if (entry.span_type === 'llm' && ((_a = entry.output) === null || _a === void 0 ? void 0 : _a.usage)) {
|
|
435
510
|
const usage = entry.output.usage;
|
|
511
|
+
const modelName = ((_b = entry.inputs) === null || _b === void 0 ? void 0 : _b.model) || "";
|
|
436
512
|
let promptTokens = 0;
|
|
437
513
|
let completionTokens = 0;
|
|
514
|
+
// Handle different token naming conventions
|
|
438
515
|
if (usage.prompt_tokens !== undefined || usage.completion_tokens !== undefined) {
|
|
439
516
|
promptTokens = usage.prompt_tokens || 0;
|
|
440
517
|
completionTokens = usage.completion_tokens || 0;
|
|
@@ -442,6 +519,7 @@ class TraceClient {
|
|
|
442
519
|
else if (usage.input_tokens !== undefined || usage.output_tokens !== undefined) {
|
|
443
520
|
promptTokens = usage.input_tokens || 0;
|
|
444
521
|
completionTokens = usage.output_tokens || 0;
|
|
522
|
+
// Standardize naming
|
|
445
523
|
usage.prompt_tokens = promptTokens;
|
|
446
524
|
usage.completion_tokens = completionTokens;
|
|
447
525
|
delete usage.input_tokens;
|
|
@@ -450,33 +528,63 @@ class TraceClient {
|
|
|
450
528
|
tokenCounts.prompt_tokens += promptTokens;
|
|
451
529
|
tokenCounts.completion_tokens += completionTokens;
|
|
452
530
|
tokenCounts.total_tokens += usage.total_tokens || (promptTokens + completionTokens);
|
|
453
|
-
|
|
531
|
+
// Add to list of calls for cost calculation
|
|
454
532
|
if (modelName) {
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
533
|
+
llmCalls.push({
|
|
534
|
+
modelName,
|
|
535
|
+
promptTokens,
|
|
536
|
+
completionTokens,
|
|
537
|
+
entryIndex: index
|
|
538
|
+
});
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
index++;
|
|
542
|
+
}
|
|
543
|
+
// Second pass: calculate costs for each LLM call using the API
|
|
544
|
+
if (this.traceManager && llmCalls.length > 0) {
|
|
545
|
+
// Process each LLM call
|
|
546
|
+
for (const call of llmCalls) {
|
|
547
|
+
try {
|
|
548
|
+
// Get costs from the API
|
|
549
|
+
const costs = yield this.traceManager.calculateTokenCosts(call.modelName, call.promptTokens, call.completionTokens);
|
|
550
|
+
if (costs) {
|
|
551
|
+
// Update the entry with the costs
|
|
552
|
+
const entry = condensedEntries[call.entryIndex];
|
|
553
|
+
if ((_c = entry.output) === null || _c === void 0 ? void 0 : _c.usage) {
|
|
554
|
+
entry.output.usage.prompt_tokens_cost_usd = costs.prompt_tokens_cost_usd;
|
|
555
|
+
entry.output.usage.completion_tokens_cost_usd = costs.completion_tokens_cost_usd;
|
|
556
|
+
entry.output.usage.total_cost_usd = costs.total_cost_usd;
|
|
557
|
+
}
|
|
558
|
+
// Add to the total costs, ensuring values are numbers (default to 0)
|
|
559
|
+
tokenCounts.prompt_tokens_cost_usd += (_d = costs.prompt_tokens_cost_usd) !== null && _d !== void 0 ? _d : 0.0;
|
|
560
|
+
tokenCounts.completion_tokens_cost_usd += (_e = costs.completion_tokens_cost_usd) !== null && _e !== void 0 ? _e : 0.0;
|
|
561
|
+
tokenCounts.total_cost_usd += (_f = costs.total_cost_usd) !== null && _f !== void 0 ? _f : 0.0;
|
|
465
562
|
}
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
563
|
+
else {
|
|
564
|
+
// If calculation failed, set costs to null in the entry (matching Python behavior)
|
|
565
|
+
const entry = condensedEntries[call.entryIndex];
|
|
566
|
+
if ((_g = entry.output) === null || _g === void 0 ? void 0 : _g.usage) {
|
|
567
|
+
entry.output.usage.prompt_tokens_cost_usd = null;
|
|
568
|
+
entry.output.usage.completion_tokens_cost_usd = null;
|
|
569
|
+
entry.output.usage.total_cost_usd = null;
|
|
570
|
+
}
|
|
571
|
+
// Log warning, but totals remain 0 for this call
|
|
572
|
+
logger_instance_js_1.default.warn(`Token cost calculation failed for model '${call.modelName}'. Cost information will not be available.`);
|
|
471
573
|
}
|
|
472
574
|
}
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
575
|
+
catch (e) {
|
|
576
|
+
logger_instance_js_1.default.warn(`Error calculating cost for model '${call.modelName}':`, e);
|
|
577
|
+
// Set costs to null in the entry
|
|
578
|
+
const entry = condensedEntries[call.entryIndex];
|
|
579
|
+
if ((_h = entry.output) === null || _h === void 0 ? void 0 : _h.usage) {
|
|
580
|
+
entry.output.usage.prompt_tokens_cost_usd = null;
|
|
581
|
+
entry.output.usage.completion_tokens_cost_usd = null;
|
|
582
|
+
entry.output.usage.total_cost_usd = null;
|
|
583
|
+
}
|
|
584
|
+
// Totals remain unchanged (effectively adding 0)
|
|
477
585
|
}
|
|
478
586
|
}
|
|
479
|
-
}
|
|
587
|
+
}
|
|
480
588
|
// Convert rules array to a dictionary (Record<string, Rule>)
|
|
481
589
|
const rulesDict = {};
|
|
482
590
|
this.rules.forEach(rule => {
|
|
@@ -493,16 +601,15 @@ class TraceClient {
|
|
|
493
601
|
duration: totalDuration,
|
|
494
602
|
token_counts: tokenCounts,
|
|
495
603
|
entries: condensedEntries,
|
|
496
|
-
|
|
497
|
-
empty_save: emptySave,
|
|
604
|
+
evaluation_runs: evaluationRuns,
|
|
498
605
|
overwrite: this.overwrite,
|
|
499
606
|
parent_trace_id: this.parentTraceId,
|
|
500
607
|
parent_name: this.parentName
|
|
501
608
|
};
|
|
502
609
|
try {
|
|
503
|
-
yield this.traceManager.saveTrace(traceData
|
|
610
|
+
yield this.traceManager.saveTrace(traceData);
|
|
504
611
|
logger_instance_js_1.default.info(`Trace ${this.traceId} saved successfully.`);
|
|
505
|
-
if (
|
|
612
|
+
if (this.enableEvaluations) {
|
|
506
613
|
try {
|
|
507
614
|
yield this.traceManager.addTraceToEvalQueue(traceData);
|
|
508
615
|
logger_instance_js_1.default.info(`Trace ${this.traceId} added to evaluation queue.`);
|
|
@@ -536,7 +643,7 @@ class TraceClient {
|
|
|
536
643
|
traceClientContext.entries.forEach(entry => {
|
|
537
644
|
var _a;
|
|
538
645
|
const indent = " ".repeat((_a = entry.depth) !== null && _a !== void 0 ? _a : 0);
|
|
539
|
-
const timeStr = entry.
|
|
646
|
+
const timeStr = entry.created_at ? `@ ${new Date(entry.created_at * 1000).toISOString()}` : '';
|
|
540
647
|
const shortSpanId = entry.span_id ? `(id: ${entry.span_id.substring(0, 8)}...)` : '';
|
|
541
648
|
const shortParentId = entry.parent_span_id ? `(parent: ${entry.parent_span_id.substring(0, 8)}...)` : '';
|
|
542
649
|
try {
|
|
@@ -617,9 +724,8 @@ class TraceClient {
|
|
|
617
724
|
* @returns Promise that resolves when the evaluation entry has been added to the trace
|
|
618
725
|
*/
|
|
619
726
|
asyncEvaluate(scorers_1) {
|
|
620
|
-
return __awaiter(this, arguments, void 0, function* (
|
|
621
|
-
|
|
622
|
-
scorers, options = {}) {
|
|
727
|
+
return __awaiter(this, arguments, void 0, function* (scorers, options = {}) {
|
|
728
|
+
var _a;
|
|
623
729
|
if (!this.enableEvaluations) {
|
|
624
730
|
logger_instance_js_1.default.warn("Evaluations are disabled. Skipping async evaluation.");
|
|
625
731
|
return;
|
|
@@ -634,6 +740,12 @@ class TraceClient {
|
|
|
634
740
|
logger_instance_js_1.default.warn("No APIJudgmentScorers found in the provided scorers list. Skipping async evaluation as backend requires API scorers.");
|
|
635
741
|
return;
|
|
636
742
|
}
|
|
743
|
+
// Process rules (currently just using this.rules directly)
|
|
744
|
+
const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
|
|
745
|
+
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
|
|
746
|
+
if (loadedRules && loadedRules.length > 0 && scorers.some(s => !(s instanceof base_scorer_js_1.APIJudgmentScorer))) {
|
|
747
|
+
throw new Error("Cannot use Judgeval scorers, you can only use API scorers when using rules. Please either remove rules or use only APIJudgmentScorer types.");
|
|
748
|
+
}
|
|
637
749
|
const startTime = Date.now() / 1000; // Record start time in seconds
|
|
638
750
|
// Create example structure matching Python/backend expectations
|
|
639
751
|
const example = {
|
|
@@ -666,8 +778,6 @@ class TraceClient {
|
|
|
666
778
|
const idPart = currentEntry ? currentEntry.span_id.substring(0, 8) : this.traceId.substring(0, 8);
|
|
667
779
|
const evalName = `${this.name.charAt(0).toUpperCase() + this.name.slice(1)}-${idPart}-[${scorerNames}]`;
|
|
668
780
|
// --- End eval name creation ---
|
|
669
|
-
// Process rules (currently just using this.rules directly)
|
|
670
|
-
const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
|
|
671
781
|
// Construct the evaluation payload
|
|
672
782
|
const evalRunPayload = {
|
|
673
783
|
organization_id: this.organizationId,
|
|
@@ -682,45 +792,24 @@ class TraceClient {
|
|
|
682
792
|
override: this.overwrite, // Use trace's overwrite setting
|
|
683
793
|
rules: loadedRules // Pass the processed rules
|
|
684
794
|
};
|
|
685
|
-
// Add evaluation entry
|
|
686
|
-
this.
|
|
795
|
+
// Add evaluation entry to the trace
|
|
796
|
+
this.addEntry({
|
|
797
|
+
type: "evaluation",
|
|
798
|
+
function: currentEntry.function,
|
|
799
|
+
span_id: currentEntry.span_id, // May be undefined
|
|
800
|
+
depth: (_a = currentEntry.depth) !== null && _a !== void 0 ? _a : 0,
|
|
801
|
+
created_at: Date.now() / 1000,
|
|
802
|
+
evaluation_runs: [evalRunPayload], // Store the object back in an array to match interface
|
|
803
|
+
duration: Date.now() / 1000 - startTime,
|
|
804
|
+
span_type: currentEntry.span_type
|
|
805
|
+
});
|
|
687
806
|
}
|
|
688
807
|
catch (error) {
|
|
689
|
-
|
|
690
|
-
|
|
808
|
+
logger_instance_js_1.default.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
|
|
809
|
+
throw error; // Re-throw after logging
|
|
691
810
|
}
|
|
692
811
|
});
|
|
693
812
|
}
|
|
694
|
-
/**
|
|
695
|
-
* Private helper to add an evaluation entry to the trace.
|
|
696
|
-
* This mirrors the structure of Python's add_eval_run.
|
|
697
|
-
*
|
|
698
|
-
* @param evalRunPayload The constructed payload for the evaluation.
|
|
699
|
-
* @param startTime The start time (in seconds) of the evaluation process.
|
|
700
|
-
*/
|
|
701
|
-
_addEvalRun(evalRunPayload, startTime) {
|
|
702
|
-
var _a, _b;
|
|
703
|
-
const traceClientContext = getTraceClientContext();
|
|
704
|
-
const currentEntry = traceClientContext.entryStack.at(-1);
|
|
705
|
-
if (!currentEntry) {
|
|
706
|
-
logger_instance_js_1.default.warn(`No current entry to record evaluation to\nStack trace: ${new Error().stack}`);
|
|
707
|
-
return;
|
|
708
|
-
}
|
|
709
|
-
const function_ = (_a = currentEntry.function) !== null && _a !== void 0 ? _a : "unknown_function";
|
|
710
|
-
const depth = (_b = currentEntry.depth) !== null && _b !== void 0 ? _b : 0;
|
|
711
|
-
const duration = Date.now() / 1000 - startTime;
|
|
712
|
-
// Add evaluation entry to the trace
|
|
713
|
-
this.addEntry({
|
|
714
|
-
type: "evaluation",
|
|
715
|
-
function: function_,
|
|
716
|
-
span_id: currentEntry.span_id, // May be undefined
|
|
717
|
-
depth: depth,
|
|
718
|
-
timestamp: Date.now() / 1000,
|
|
719
|
-
evaluation_runs: [evalRunPayload], // Embed the payload
|
|
720
|
-
duration: duration,
|
|
721
|
-
span_type: "evaluation"
|
|
722
|
-
});
|
|
723
|
-
}
|
|
724
813
|
// OPTIONAL: Add a method to get the original name if needed elsewhere
|
|
725
814
|
getOriginalName() {
|
|
726
815
|
return this.originalName;
|
|
@@ -802,11 +891,6 @@ class Tracer {
|
|
|
802
891
|
apiKey: this.apiKey,
|
|
803
892
|
organizationId: this.organizationId,
|
|
804
893
|
});
|
|
805
|
-
if (traceClient.enableMonitoring) {
|
|
806
|
-
traceClient.save(true).catch(err => {
|
|
807
|
-
logger_instance_js_1.default.error(`Failed to save empty trace (${traceClient.traceId}):`, err);
|
|
808
|
-
});
|
|
809
|
-
}
|
|
810
894
|
return traceClient;
|
|
811
895
|
}
|
|
812
896
|
*trace(name, options = {}) {
|