judgeval 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +95 -68
  2. package/dist/cjs/common/logger-instance.js +17 -19
  3. package/dist/cjs/common/logger-instance.js.map +1 -1
  4. package/dist/cjs/common/tracer.js +210 -126
  5. package/dist/cjs/common/tracer.js.map +1 -1
  6. package/dist/cjs/constants.js +3 -2
  7. package/dist/cjs/constants.js.map +1 -1
  8. package/dist/cjs/index.js +1 -3
  9. package/dist/cjs/index.js.map +1 -1
  10. package/dist/cjs/judgment-client.js +20 -114
  11. package/dist/cjs/judgment-client.js.map +1 -1
  12. package/dist/cjs/scorers/api-scorer.js +56 -48
  13. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  14. package/dist/cjs/scorers/base-scorer.js +66 -11
  15. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  16. package/dist/esm/common/logger-instance.js +17 -19
  17. package/dist/esm/common/logger-instance.js.map +1 -1
  18. package/dist/esm/common/tracer.js +211 -127
  19. package/dist/esm/common/tracer.js.map +1 -1
  20. package/dist/esm/constants.js +2 -1
  21. package/dist/esm/constants.js.map +1 -1
  22. package/dist/esm/index.js +0 -1
  23. package/dist/esm/index.js.map +1 -1
  24. package/dist/esm/judgment-client.js +20 -114
  25. package/dist/esm/judgment-client.js.map +1 -1
  26. package/dist/esm/scorers/api-scorer.js +56 -48
  27. package/dist/esm/scorers/api-scorer.js.map +1 -1
  28. package/dist/esm/scorers/base-scorer.js +66 -11
  29. package/dist/esm/scorers/base-scorer.js.map +1 -1
  30. package/dist/types/common/tracer.d.ts +27 -13
  31. package/dist/types/constants.d.ts +2 -1
  32. package/dist/types/index.d.ts +0 -1
  33. package/dist/types/judgment-client.d.ts +0 -22
  34. package/dist/types/scorers/api-scorer.d.ts +15 -15
  35. package/dist/types/scorers/base-scorer.d.ts +53 -10
  36. package/package.json +10 -3
  37. package/dist/cjs/scorers/exact-match-scorer.js +0 -84
  38. package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
  39. package/dist/esm/scorers/exact-match-scorer.js +0 -80
  40. package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
  41. package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
@@ -74,16 +74,13 @@ class TraceManagerClient {
74
74
  try {
75
75
  // Use isomorphic fetch (available globally in modern Node.js and browsers)
76
76
  const response = yield fetch(url, Object.assign(Object.assign({}, options), { headers: headers }));
77
- if (!response.ok) {
78
- const errorBody = yield response.text();
79
- console.error(`API Error (${response.status}) for ${options.method || 'GET'} ${url}: ${errorBody}`);
80
- throw new Error(`Judgment API request failed: ${response.status} ${response.statusText} - ${errorBody}`);
81
- }
77
+ // We will return the response object even if !response.ok
78
+ // The caller (e.g., saveTrace) is responsible for checking response.ok or response.status
82
79
  // Handle cases where the response might be empty (e.g., 204 No Content on DELETE)
83
80
  if (response.status === 204) {
84
81
  return null; // Indicate success with no content
85
82
  }
86
- return yield response.json();
83
+ return response;
87
84
  }
88
85
  catch (error) {
89
86
  console.error(`Network or fetch error during ${options.method || 'GET'} ${url}:`, error);
@@ -100,21 +97,52 @@ class TraceManagerClient {
100
97
  });
101
98
  });
102
99
  }
103
- saveTrace(traceData, emptySave) {
100
+ saveTrace(traceData) {
104
101
  return __awaiter(this, void 0, void 0, function* () {
102
+ // _fetch now returns the raw response object or throws on network error
105
103
  const response = yield this._fetch(constants_js_1.JUDGMENT_TRACES_SAVE_API_URL, {
106
104
  method: 'POST',
107
- body: JSON.stringify(traceData),
105
+ body: JSON.stringify(traceData), // Stringify directly here again
108
106
  });
109
- // Optionally log the UI URL like the Python version
110
- if (!emptySave && (response === null || response === void 0 ? void 0 : response.ui_results_url)) {
111
- // Use console.info or a dedicated logger for user-facing messages
112
- // Note: We can't replicate Rich library's colored link easily in standard console
107
+ // Check if _fetch threw a network error (caught below) or returned an invalid object
108
+ if (!response) {
109
+ // This case should ideally be caught by _fetch's catch block, but double-check
110
+ throw new Error('Failed to save trace data: No response received from API.');
111
+ }
112
+ // Now, check the status code on the received response object
113
+ if (response.status === 400) {
114
+ // Attempt to get error body for more info
115
+ const errorBody = yield response.text();
116
+ throw new Error(`Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: ${response.status} ${response.statusText || ''} - ${errorBody}`);
117
+ }
118
+ else if (!response.ok) { // Handles other errors (5xx, 4xx except 400)
119
+ const errorBody = yield response.text();
120
+ throw new Error(`Failed to save trace data: Status ${response.status} ${response.statusText || '(No status text)'} - ${errorBody}`);
121
+ }
122
+ // --- Success Path ---
123
+ // Optionally log the UI URL (needs JSON parsing)
124
+ let responseData = null;
125
+ try {
126
+ // Handle 204 No Content specifically
127
+ if (response.status === 204) {
128
+ responseData = null; // Or maybe { success: true }?
129
+ }
130
+ else {
131
+ responseData = yield response.json(); // Parse JSON only on success
132
+ }
133
+ }
134
+ catch (parseError) {
135
+ logger_instance_js_1.default.warn("Failed to parse successful API response JSON.", { error: parseError });
136
+ // Depending on requirements, maybe throw, maybe return a default success object
137
+ throw new Error(`API request succeeded (${response.status}), but failed to parse JSON response.`);
138
+ }
139
+ if (responseData === null || responseData === void 0 ? void 0 : responseData.ui_results_url) {
113
140
  console.info(`
114
- 🔍 View trace: ${response.ui_results_url}
141
+ 🔍 View trace: ${responseData.ui_results_url}
115
142
  `);
116
143
  }
117
- return response;
144
+ // Return the parsed data (or null for 204)
145
+ return responseData;
118
146
  });
119
147
  }
120
148
  deleteTrace(traceId) {
@@ -143,6 +171,35 @@ class TraceManagerClient {
143
171
  });
144
172
  });
145
173
  }
174
+ /**
175
+ * Calculate token costs directly using the API endpoint.
176
+ * This is more accurate than client-side calculation as it uses the most up-to-date pricing.
177
+ *
178
+ * @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229')
179
+ * @param promptTokens Number of tokens in the prompt/input
180
+ * @param completionTokens Number of tokens in the completion/output
181
+ * @returns Object containing token counts and calculated costs in USD
182
+ */
183
+ calculateTokenCosts(model, promptTokens, completionTokens) {
184
+ return __awaiter(this, void 0, void 0, function* () {
185
+ try {
186
+ // Use the new calculation endpoint
187
+ const result = yield this._fetch(constants_js_1.JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL, {
188
+ method: 'POST',
189
+ body: JSON.stringify({
190
+ model,
191
+ prompt_tokens: promptTokens,
192
+ completion_tokens: completionTokens
193
+ })
194
+ });
195
+ return result;
196
+ }
197
+ catch (error) {
198
+ logger_instance_js_1.default.warn(`Failed to calculate token costs for model ${model}.`, { error: error instanceof Error ? error.message : String(error) });
199
+ return null;
200
+ }
201
+ });
202
+ }
146
203
  }
147
204
  exports.TraceManagerClient = TraceManagerClient;
148
205
  // --- Helper Functions ---
@@ -160,6 +217,7 @@ class TraceClient {
160
217
  constructor(config) {
161
218
  var _a, _b, _c, _d, _e;
162
219
  this.traceManager = null; // Can be null if monitoring disabled
220
+ this._spanDepths = {}; // Track depth of active spans
163
221
  this.traceId = config.traceId || (0, uuid_1.v4)();
164
222
  this.originalName = config.name || 'default_trace'; // Store original
165
223
  this.name = sanitizeName(this.originalName); // Use sanitized name internally
@@ -198,7 +256,7 @@ class TraceClient {
198
256
  recordInput(inputs) {
199
257
  const traceClientContext = getTraceClientContext();
200
258
  const currentEntry = traceClientContext.entryStack.at(-1);
201
- if (!currentEntry) {
259
+ if (!currentEntry || !currentEntry.span_id) {
202
260
  console.warn(`No current entry to record input to\nStack trace: ${new Error().stack}`);
203
261
  return;
204
262
  }
@@ -207,14 +265,16 @@ class TraceClient {
207
265
  span_id: currentEntry.span_id,
208
266
  inputs,
209
267
  function: currentEntry.function,
210
- depth: currentEntry.depth,
211
- span_type: currentEntry.span_type
268
+ depth: this._spanDepths[currentEntry.span_id],
269
+ created_at: Date.now() / 1000,
270
+ span_type: currentEntry.span_type,
271
+ message: `Inputs to ${currentEntry.function}`
212
272
  });
213
273
  }
214
274
  recordOutput(output) {
215
275
  const traceClientContext = getTraceClientContext();
216
276
  const currentEntry = traceClientContext.entryStack.at(-1);
217
- if (!currentEntry) {
277
+ if (!currentEntry || !currentEntry.span_id) {
218
278
  console.warn(`No current entry to record output to\nStack trace: ${new Error().stack}`);
219
279
  return;
220
280
  }
@@ -223,33 +283,28 @@ class TraceClient {
223
283
  span_id: currentEntry.span_id,
224
284
  output,
225
285
  function: currentEntry.function,
226
- depth: currentEntry.depth,
227
- span_type: currentEntry.span_type
286
+ depth: this._spanDepths[currentEntry.span_id],
287
+ created_at: Date.now() / 1000,
288
+ span_type: currentEntry.span_type,
289
+ message: `Output from ${currentEntry.function}`
228
290
  });
229
291
  }
230
292
  recordError(error) {
231
- var _a;
232
293
  const traceClientContext = getTraceClientContext();
233
294
  const currentEntry = traceClientContext.entryStack.at(-1);
234
- if (!currentEntry) {
295
+ if (!currentEntry || !currentEntry.span_id) {
235
296
  console.warn(`No current entry to record error to\nStack trace: ${new Error().stack}`);
236
297
  return;
237
298
  }
238
- let output = error;
239
- if (error instanceof Error) {
240
- output = {
241
- name: error.name,
242
- message: error.message,
243
- stack: (_a = error.stack) === null || _a === void 0 ? void 0 : _a.substring(0, 1000)
244
- };
245
- }
246
299
  this.addEntry({
247
300
  type: 'error',
248
301
  span_id: currentEntry.span_id,
249
- output,
302
+ output: error,
250
303
  function: currentEntry.function,
251
- depth: currentEntry.depth,
252
- span_type: currentEntry.span_type
304
+ depth: this._spanDepths[currentEntry.span_id],
305
+ created_at: Date.now() / 1000,
306
+ span_type: currentEntry.span_type,
307
+ message: `Error from ${currentEntry.function}`
253
308
  });
254
309
  }
255
310
  startSpan(name, options = {}) {
@@ -260,18 +315,20 @@ class TraceClient {
260
315
  const spanType = (_a = options.spanType) !== null && _a !== void 0 ? _a : 'span';
261
316
  const startTime = Date.now() / 1000;
262
317
  let depth = 0, parentSpanId = undefined;
263
- if (parentEntry) {
264
- depth = parentEntry.depth + 1;
318
+ if (parentEntry && parentEntry.span_id) {
319
+ depth = this._spanDepths[parentEntry.span_id] + 1;
265
320
  parentSpanId = parentEntry.span_id;
266
321
  }
322
+ this._spanDepths[spanId] = depth;
267
323
  const entry = {
268
324
  type: 'enter',
269
325
  function: name,
270
326
  span_id: spanId,
271
327
  depth: depth,
272
- timestamp: startTime,
328
+ created_at: startTime,
273
329
  span_type: spanType,
274
- parent_span_id: parentSpanId
330
+ parent_span_id: parentSpanId,
331
+ message: name
275
332
  };
276
333
  this.addEntry(entry);
277
334
  traceClientContext.entryStack.push(entry);
@@ -279,21 +336,24 @@ class TraceClient {
279
336
  endSpan() {
280
337
  const traceClientContext = getTraceClientContext();
281
338
  const enterEntry = traceClientContext.entryStack.pop();
282
- if (!enterEntry) {
339
+ if (!enterEntry || !enterEntry.span_id) {
283
340
  console.warn("No enter entry to end");
284
341
  return;
285
342
  }
286
343
  const endTime = Date.now() / 1000;
287
- const duration = endTime - enterEntry.timestamp;
344
+ const duration = endTime - enterEntry.created_at;
288
345
  this.addEntry({
289
346
  type: 'exit',
290
347
  function: enterEntry.function,
291
348
  span_id: enterEntry.span_id,
292
- depth: enterEntry.depth,
293
- timestamp: endTime,
349
+ depth: this._spanDepths[enterEntry.span_id],
350
+ created_at: endTime,
294
351
  duration: duration,
295
- span_type: enterEntry.span_type
352
+ span_type: enterEntry.span_type,
353
+ message: `← ${enterEntry.function}`
296
354
  });
355
+ // Clean up depth tracking
356
+ delete this._spanDepths[enterEntry.span_id];
297
357
  }
298
358
  *span(name, options = {}) {
299
359
  if (!this.enableMonitoring) {
@@ -311,6 +371,7 @@ class TraceClient {
311
371
  condenseTrace(rawEntries) {
312
372
  var _a, _b, _c, _d, _e;
313
373
  const spansById = {};
374
+ const allEvaluationRuns = []; // To collect all eval runs
314
375
  for (const entry of rawEntries) {
315
376
  const spanId = entry.span_id;
316
377
  if (!spanId)
@@ -320,7 +381,8 @@ class TraceClient {
320
381
  span_id: spanId,
321
382
  function: entry.function || 'unknown',
322
383
  depth: (_a = entry.depth) !== null && _a !== void 0 ? _a : 0,
323
- timestamp: (_b = entry.timestamp) !== null && _b !== void 0 ? _b : 0,
384
+ created_at: new Date(((_b = entry.created_at) !== null && _b !== void 0 ? _b : 0) * 1000).toISOString(), // Convert number to ISO string
385
+ trace_id: this.traceId, // Add trace_id
324
386
  parent_span_id: entry.parent_span_id,
325
387
  span_type: entry.span_type || 'span',
326
388
  inputs: null,
@@ -335,14 +397,14 @@ class TraceClient {
335
397
  case 'enter':
336
398
  currentSpanData.function = entry.function || currentSpanData.function;
337
399
  currentSpanData.depth = (_c = entry.depth) !== null && _c !== void 0 ? _c : currentSpanData.depth;
338
- currentSpanData.timestamp = (_d = entry.timestamp) !== null && _d !== void 0 ? _d : currentSpanData.timestamp;
400
+ currentSpanData.created_at = new Date(((_d = entry.created_at) !== null && _d !== void 0 ? _d : 0) * 1000).toISOString(); // Ensure created_at is string on update
339
401
  currentSpanData.parent_span_id = entry.parent_span_id;
340
402
  currentSpanData.span_type = entry.span_type || currentSpanData.span_type;
341
- currentSpanData.start_time = entry.timestamp;
403
+ currentSpanData.start_time = entry.created_at; // Keep original number for duration calc
342
404
  break;
343
405
  case 'exit':
344
406
  currentSpanData.duration = (_e = entry.duration) !== null && _e !== void 0 ? _e : currentSpanData.duration;
345
- currentSpanData.end_time = entry.timestamp;
407
+ currentSpanData.end_time = entry.created_at; // Keep original number for duration calc
346
408
  if (currentSpanData.duration === null && currentSpanData.start_time && currentSpanData.end_time) {
347
409
  currentSpanData.duration = currentSpanData.end_time - currentSpanData.start_time;
348
410
  }
@@ -360,8 +422,11 @@ class TraceClient {
360
422
  currentSpanData.output = entry.output;
361
423
  break;
362
424
  case 'evaluation':
363
- if (entry.evaluation_runs) {
364
- currentSpanData.evaluation_runs.push(...entry.evaluation_runs);
425
+ // Check if evaluation_runs is an array and has at least one element
426
+ if (Array.isArray(entry.evaluation_runs) && entry.evaluation_runs.length > 0) {
427
+ const evalPayload = entry.evaluation_runs[0]; // Extract the payload object
428
+ currentSpanData.evaluation_runs.push(evalPayload); // Add the object to the span's list
429
+ allEvaluationRuns.push(evalPayload); // Add the object to the central list
365
430
  }
366
431
  break;
367
432
  }
@@ -392,9 +457,11 @@ class TraceClient {
392
457
  childrenMap[parentId].push(span);
393
458
  }
394
459
  }
395
- roots.sort((a, b) => a.timestamp - b.timestamp);
460
+ // Sort using parsed dates
461
+ roots.sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
396
462
  for (const parentId in childrenMap) {
397
- childrenMap[parentId].sort((a, b) => a.timestamp - b.timestamp);
463
+ // Sort using parsed dates
464
+ childrenMap[parentId].sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
398
465
  }
399
466
  function buildFlatListDfs(span) {
400
467
  if (visited.has(span.span_id))
@@ -415,26 +482,36 @@ class TraceClient {
415
482
  buildFlatListDfs(span);
416
483
  }
417
484
  }
418
- return sortedCondensedList;
485
+ return [sortedCondensedList, allEvaluationRuns]; // Return both
419
486
  }
420
487
  save() {
421
488
  return __awaiter(this, arguments, void 0, function* (emptySave = false) {
489
+ var _a, _b, _c, _d, _e, _f, _g, _h;
422
490
  if (!this.enableMonitoring || !this.traceManager) {
423
491
  return null;
424
492
  }
425
493
  const traceClientContext = getTraceClientContext();
426
494
  const totalDuration = this.getDuration();
427
- const condensedEntries = this.condenseTrace(traceClientContext.entries);
495
+ // Use the tuple returned by condenseTrace
496
+ const [condensedEntries, evaluationRuns] = this.condenseTrace(traceClientContext.entries);
428
497
  const tokenCounts = {
429
- prompt_tokens: 0, completion_tokens: 0, total_tokens: 0,
430
- prompt_tokens_cost_usd: 0.0, completion_tokens_cost_usd: 0.0, total_cost_usd: 0.0
498
+ prompt_tokens: 0,
499
+ completion_tokens: 0,
500
+ total_tokens: 0,
501
+ prompt_tokens_cost_usd: 0.0,
502
+ completion_tokens_cost_usd: 0.0,
503
+ total_cost_usd: 0.0
431
504
  };
432
- condensedEntries.forEach(entry => {
433
- var _a, _b;
505
+ // First pass: collect all LLM calls with their token counts
506
+ const llmCalls = [];
507
+ let index = 0;
508
+ for (const entry of condensedEntries) {
434
509
  if (entry.span_type === 'llm' && ((_a = entry.output) === null || _a === void 0 ? void 0 : _a.usage)) {
435
510
  const usage = entry.output.usage;
511
+ const modelName = ((_b = entry.inputs) === null || _b === void 0 ? void 0 : _b.model) || "";
436
512
  let promptTokens = 0;
437
513
  let completionTokens = 0;
514
+ // Handle different token naming conventions
438
515
  if (usage.prompt_tokens !== undefined || usage.completion_tokens !== undefined) {
439
516
  promptTokens = usage.prompt_tokens || 0;
440
517
  completionTokens = usage.completion_tokens || 0;
@@ -442,6 +519,7 @@ class TraceClient {
442
519
  else if (usage.input_tokens !== undefined || usage.output_tokens !== undefined) {
443
520
  promptTokens = usage.input_tokens || 0;
444
521
  completionTokens = usage.output_tokens || 0;
522
+ // Standardize naming
445
523
  usage.prompt_tokens = promptTokens;
446
524
  usage.completion_tokens = completionTokens;
447
525
  delete usage.input_tokens;
@@ -450,33 +528,63 @@ class TraceClient {
450
528
  tokenCounts.prompt_tokens += promptTokens;
451
529
  tokenCounts.completion_tokens += completionTokens;
452
530
  tokenCounts.total_tokens += usage.total_tokens || (promptTokens + completionTokens);
453
- const modelName = ((_b = entry.inputs) === null || _b === void 0 ? void 0 : _b.model) || "";
531
+ // Add to list of calls for cost calculation
454
532
  if (modelName) {
455
- try {
456
- const promptCost = 0.0;
457
- const completionCost = 0.0;
458
- const callTotalCost = promptCost + completionCost;
459
- usage.prompt_tokens_cost_usd = promptCost;
460
- usage.completion_tokens_cost_usd = completionCost;
461
- usage.total_cost_usd = callTotalCost;
462
- tokenCounts.prompt_tokens_cost_usd += promptCost;
463
- tokenCounts.completion_tokens_cost_usd += completionCost;
464
- tokenCounts.total_cost_usd += callTotalCost;
533
+ llmCalls.push({
534
+ modelName,
535
+ promptTokens,
536
+ completionTokens,
537
+ entryIndex: index
538
+ });
539
+ }
540
+ }
541
+ index++;
542
+ }
543
+ // Second pass: calculate costs for each LLM call using the API
544
+ if (this.traceManager && llmCalls.length > 0) {
545
+ // Process each LLM call
546
+ for (const call of llmCalls) {
547
+ try {
548
+ // Get costs from the API
549
+ const costs = yield this.traceManager.calculateTokenCosts(call.modelName, call.promptTokens, call.completionTokens);
550
+ if (costs) {
551
+ // Update the entry with the costs
552
+ const entry = condensedEntries[call.entryIndex];
553
+ if ((_c = entry.output) === null || _c === void 0 ? void 0 : _c.usage) {
554
+ entry.output.usage.prompt_tokens_cost_usd = costs.prompt_tokens_cost_usd;
555
+ entry.output.usage.completion_tokens_cost_usd = costs.completion_tokens_cost_usd;
556
+ entry.output.usage.total_cost_usd = costs.total_cost_usd;
557
+ }
558
+ // Add to the total costs, ensuring values are numbers (default to 0)
559
+ tokenCounts.prompt_tokens_cost_usd += (_d = costs.prompt_tokens_cost_usd) !== null && _d !== void 0 ? _d : 0.0;
560
+ tokenCounts.completion_tokens_cost_usd += (_e = costs.completion_tokens_cost_usd) !== null && _e !== void 0 ? _e : 0.0;
561
+ tokenCounts.total_cost_usd += (_f = costs.total_cost_usd) !== null && _f !== void 0 ? _f : 0.0;
465
562
  }
466
- catch (e) {
467
- console.warn(`Error calculating cost for model '${modelName}':`, e);
468
- usage.prompt_tokens_cost_usd = null;
469
- usage.completion_tokens_cost_usd = null;
470
- usage.total_cost_usd = null;
563
+ else {
564
+ // If calculation failed, set costs to null in the entry (matching Python behavior)
565
+ const entry = condensedEntries[call.entryIndex];
566
+ if ((_g = entry.output) === null || _g === void 0 ? void 0 : _g.usage) {
567
+ entry.output.usage.prompt_tokens_cost_usd = null;
568
+ entry.output.usage.completion_tokens_cost_usd = null;
569
+ entry.output.usage.total_cost_usd = null;
570
+ }
571
+ // Log warning, but totals remain 0 for this call
572
+ logger_instance_js_1.default.warn(`Token cost calculation failed for model '${call.modelName}'. Cost information will not be available.`);
471
573
  }
472
574
  }
473
- else {
474
- usage.prompt_tokens_cost_usd = null;
475
- usage.completion_tokens_cost_usd = null;
476
- usage.total_cost_usd = null;
575
+ catch (e) {
576
+ logger_instance_js_1.default.warn(`Error calculating cost for model '${call.modelName}':`, e);
577
+ // Set costs to null in the entry
578
+ const entry = condensedEntries[call.entryIndex];
579
+ if ((_h = entry.output) === null || _h === void 0 ? void 0 : _h.usage) {
580
+ entry.output.usage.prompt_tokens_cost_usd = null;
581
+ entry.output.usage.completion_tokens_cost_usd = null;
582
+ entry.output.usage.total_cost_usd = null;
583
+ }
584
+ // Totals remain unchanged (effectively adding 0)
477
585
  }
478
586
  }
479
- });
587
+ }
480
588
  // Convert rules array to a dictionary (Record<string, Rule>)
481
589
  const rulesDict = {};
482
590
  this.rules.forEach(rule => {
@@ -493,16 +601,15 @@ class TraceClient {
493
601
  duration: totalDuration,
494
602
  token_counts: tokenCounts,
495
603
  entries: condensedEntries,
496
- rules: rulesDict,
497
- empty_save: emptySave,
604
+ evaluation_runs: evaluationRuns,
498
605
  overwrite: this.overwrite,
499
606
  parent_trace_id: this.parentTraceId,
500
607
  parent_name: this.parentName
501
608
  };
502
609
  try {
503
- yield this.traceManager.saveTrace(traceData, emptySave);
610
+ yield this.traceManager.saveTrace(traceData);
504
611
  logger_instance_js_1.default.info(`Trace ${this.traceId} saved successfully.`);
505
- if (!emptySave && this.enableEvaluations) {
612
+ if (this.enableEvaluations) {
506
613
  try {
507
614
  yield this.traceManager.addTraceToEvalQueue(traceData);
508
615
  logger_instance_js_1.default.info(`Trace ${this.traceId} added to evaluation queue.`);
@@ -536,7 +643,7 @@ class TraceClient {
536
643
  traceClientContext.entries.forEach(entry => {
537
644
  var _a;
538
645
  const indent = " ".repeat((_a = entry.depth) !== null && _a !== void 0 ? _a : 0);
539
- const timeStr = entry.timestamp ? `@ ${new Date(entry.timestamp * 1000).toISOString()}` : '';
646
+ const timeStr = entry.created_at ? `@ ${new Date(entry.created_at * 1000).toISOString()}` : '';
540
647
  const shortSpanId = entry.span_id ? `(id: ${entry.span_id.substring(0, 8)}...)` : '';
541
648
  const shortParentId = entry.parent_span_id ? `(parent: ${entry.parent_span_id.substring(0, 8)}...)` : '';
542
649
  try {
@@ -617,9 +724,8 @@ class TraceClient {
617
724
  * @returns Promise that resolves when the evaluation entry has been added to the trace
618
725
  */
619
726
  asyncEvaluate(scorers_1) {
620
- return __awaiter(this, arguments, void 0, function* (
621
- // Accept general Scorer type, but filter/check for API scorers internally
622
- scorers, options = {}) {
727
+ return __awaiter(this, arguments, void 0, function* (scorers, options = {}) {
728
+ var _a;
623
729
  if (!this.enableEvaluations) {
624
730
  logger_instance_js_1.default.warn("Evaluations are disabled. Skipping async evaluation.");
625
731
  return;
@@ -634,6 +740,12 @@ class TraceClient {
634
740
  logger_instance_js_1.default.warn("No APIJudgmentScorers found in the provided scorers list. Skipping async evaluation as backend requires API scorers.");
635
741
  return;
636
742
  }
743
+ // Process rules (currently just using this.rules directly)
744
+ const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
745
+ // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
746
+ if (loadedRules && loadedRules.length > 0 && scorers.some(s => !(s instanceof base_scorer_js_1.APIJudgmentScorer))) {
747
+ throw new Error("Cannot use Judgeval scorers, you can only use API scorers when using rules. Please either remove rules or use only APIJudgmentScorer types.");
748
+ }
637
749
  const startTime = Date.now() / 1000; // Record start time in seconds
638
750
  // Create example structure matching Python/backend expectations
639
751
  const example = {
@@ -666,8 +778,6 @@ class TraceClient {
666
778
  const idPart = currentEntry ? currentEntry.span_id.substring(0, 8) : this.traceId.substring(0, 8);
667
779
  const evalName = `${this.name.charAt(0).toUpperCase() + this.name.slice(1)}-${idPart}-[${scorerNames}]`;
668
780
  // --- End eval name creation ---
669
- // Process rules (currently just using this.rules directly)
670
- const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
671
781
  // Construct the evaluation payload
672
782
  const evalRunPayload = {
673
783
  organization_id: this.organizationId,
@@ -682,45 +792,24 @@ class TraceClient {
682
792
  override: this.overwrite, // Use trace's overwrite setting
683
793
  rules: loadedRules // Pass the processed rules
684
794
  };
685
- // Add evaluation entry using the helper method
686
- this._addEvalRun(evalRunPayload, startTime);
795
+ // Add evaluation entry to the trace
796
+ this.addEntry({
797
+ type: "evaluation",
798
+ function: currentEntry.function,
799
+ span_id: currentEntry.span_id, // May be undefined
800
+ depth: (_a = currentEntry.depth) !== null && _a !== void 0 ? _a : 0,
801
+ created_at: Date.now() / 1000,
802
+ evaluation_runs: [evalRunPayload], // Store the object back in an array to match interface
803
+ duration: Date.now() / 1000 - startTime,
804
+ span_type: currentEntry.span_type
805
+ });
687
806
  }
688
807
  catch (error) {
689
- console.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
690
- // Decide if we should re-throw or just log
808
+ logger_instance_js_1.default.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
809
+ throw error; // Re-throw after logging
691
810
  }
692
811
  });
693
812
  }
694
- /**
695
- * Private helper to add an evaluation entry to the trace.
696
- * This mirrors the structure of Python's add_eval_run.
697
- *
698
- * @param evalRunPayload The constructed payload for the evaluation.
699
- * @param startTime The start time (in seconds) of the evaluation process.
700
- */
701
- _addEvalRun(evalRunPayload, startTime) {
702
- var _a, _b;
703
- const traceClientContext = getTraceClientContext();
704
- const currentEntry = traceClientContext.entryStack.at(-1);
705
- if (!currentEntry) {
706
- logger_instance_js_1.default.warn(`No current entry to record evaluation to\nStack trace: ${new Error().stack}`);
707
- return;
708
- }
709
- const function_ = (_a = currentEntry.function) !== null && _a !== void 0 ? _a : "unknown_function";
710
- const depth = (_b = currentEntry.depth) !== null && _b !== void 0 ? _b : 0;
711
- const duration = Date.now() / 1000 - startTime;
712
- // Add evaluation entry to the trace
713
- this.addEntry({
714
- type: "evaluation",
715
- function: function_,
716
- span_id: currentEntry.span_id, // May be undefined
717
- depth: depth,
718
- timestamp: Date.now() / 1000,
719
- evaluation_runs: [evalRunPayload], // Embed the payload
720
- duration: duration,
721
- span_type: "evaluation"
722
- });
723
- }
724
813
  // OPTIONAL: Add a method to get the original name if needed elsewhere
725
814
  getOriginalName() {
726
815
  return this.originalName;
@@ -802,11 +891,6 @@ class Tracer {
802
891
  apiKey: this.apiKey,
803
892
  organizationId: this.organizationId,
804
893
  });
805
- if (traceClient.enableMonitoring) {
806
- traceClient.save(true).catch(err => {
807
- logger_instance_js_1.default.error(`Failed to save empty trace (${traceClient.traceId}):`, err);
808
- });
809
- }
810
894
  return traceClient;
811
895
  }
812
896
  *trace(name, options = {}) {