judgeval 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +95 -68
  2. package/dist/cjs/common/logger-instance.js +17 -19
  3. package/dist/cjs/common/logger-instance.js.map +1 -1
  4. package/dist/cjs/common/tracer.js +210 -126
  5. package/dist/cjs/common/tracer.js.map +1 -1
  6. package/dist/cjs/constants.js +3 -2
  7. package/dist/cjs/constants.js.map +1 -1
  8. package/dist/cjs/index.js +1 -3
  9. package/dist/cjs/index.js.map +1 -1
  10. package/dist/cjs/judgment-client.js +20 -114
  11. package/dist/cjs/judgment-client.js.map +1 -1
  12. package/dist/cjs/scorers/api-scorer.js +56 -48
  13. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  14. package/dist/cjs/scorers/base-scorer.js +66 -11
  15. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  16. package/dist/esm/common/logger-instance.js +17 -19
  17. package/dist/esm/common/logger-instance.js.map +1 -1
  18. package/dist/esm/common/tracer.js +211 -127
  19. package/dist/esm/common/tracer.js.map +1 -1
  20. package/dist/esm/constants.js +2 -1
  21. package/dist/esm/constants.js.map +1 -1
  22. package/dist/esm/index.js +0 -1
  23. package/dist/esm/index.js.map +1 -1
  24. package/dist/esm/judgment-client.js +20 -114
  25. package/dist/esm/judgment-client.js.map +1 -1
  26. package/dist/esm/scorers/api-scorer.js +56 -48
  27. package/dist/esm/scorers/api-scorer.js.map +1 -1
  28. package/dist/esm/scorers/base-scorer.js +66 -11
  29. package/dist/esm/scorers/base-scorer.js.map +1 -1
  30. package/dist/types/common/tracer.d.ts +27 -13
  31. package/dist/types/constants.d.ts +2 -1
  32. package/dist/types/index.d.ts +0 -1
  33. package/dist/types/judgment-client.d.ts +0 -22
  34. package/dist/types/scorers/api-scorer.d.ts +15 -15
  35. package/dist/types/scorers/base-scorer.d.ts +53 -10
  36. package/package.json +10 -3
  37. package/dist/cjs/scorers/exact-match-scorer.js +0 -84
  38. package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
  39. package/dist/esm/scorers/exact-match-scorer.js +0 -80
  40. package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
  41. package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
@@ -14,7 +14,7 @@ import { AsyncLocalStorage } from 'async_hooks';
14
14
  import OpenAI from 'openai';
15
15
  import Anthropic from '@anthropic-ai/sdk';
16
16
  // Local Imports
17
- import { JUDGMENT_TRACES_SAVE_API_URL, JUDGMENT_TRACES_FETCH_API_URL, JUDGMENT_TRACES_DELETE_API_URL, JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL,
17
+ import { JUDGMENT_TRACES_SAVE_API_URL, JUDGMENT_TRACES_FETCH_API_URL, JUDGMENT_TRACES_DELETE_API_URL, JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL, JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL,
18
18
  // Add other necessary constants if needed
19
19
  } from '../constants.js';
20
20
  import { APIJudgmentScorer } from '../scorers/base-scorer.js';
@@ -70,16 +70,13 @@ class TraceManagerClient {
70
70
  try {
71
71
  // Use isomorphic fetch (available globally in modern Node.js and browsers)
72
72
  const response = yield fetch(url, Object.assign(Object.assign({}, options), { headers: headers }));
73
- if (!response.ok) {
74
- const errorBody = yield response.text();
75
- console.error(`API Error (${response.status}) for ${options.method || 'GET'} ${url}: ${errorBody}`);
76
- throw new Error(`Judgment API request failed: ${response.status} ${response.statusText} - ${errorBody}`);
77
- }
73
+ // We will return the response object even if !response.ok
74
+ // The caller (e.g., saveTrace) is responsible for checking response.ok or response.status
78
75
  // Handle cases where the response might be empty (e.g., 204 No Content on DELETE)
79
76
  if (response.status === 204) {
80
77
  return null; // Indicate success with no content
81
78
  }
82
- return yield response.json();
79
+ return response;
83
80
  }
84
81
  catch (error) {
85
82
  console.error(`Network or fetch error during ${options.method || 'GET'} ${url}:`, error);
@@ -96,21 +93,52 @@ class TraceManagerClient {
96
93
  });
97
94
  });
98
95
  }
99
- saveTrace(traceData, emptySave) {
96
+ saveTrace(traceData) {
100
97
  return __awaiter(this, void 0, void 0, function* () {
98
+ // _fetch now returns the raw response object or throws on network error
101
99
  const response = yield this._fetch(JUDGMENT_TRACES_SAVE_API_URL, {
102
100
  method: 'POST',
103
- body: JSON.stringify(traceData),
101
+ body: JSON.stringify(traceData), // Stringify directly here again
104
102
  });
105
- // Optionally log the UI URL like the Python version
106
- if (!emptySave && (response === null || response === void 0 ? void 0 : response.ui_results_url)) {
107
- // Use console.info or a dedicated logger for user-facing messages
108
- // Note: We can't replicate Rich library's colored link easily in standard console
103
+ // Check if _fetch threw a network error (caught below) or returned an invalid object
104
+ if (!response) {
105
+ // This case should ideally be caught by _fetch's catch block, but double-check
106
+ throw new Error('Failed to save trace data: No response received from API.');
107
+ }
108
+ // Now, check the status code on the received response object
109
+ if (response.status === 400) {
110
+ // Attempt to get error body for more info
111
+ const errorBody = yield response.text();
112
+ throw new Error(`Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: ${response.status} ${response.statusText || ''} - ${errorBody}`);
113
+ }
114
+ else if (!response.ok) { // Handles other errors (5xx, 4xx except 400)
115
+ const errorBody = yield response.text();
116
+ throw new Error(`Failed to save trace data: Status ${response.status} ${response.statusText || '(No status text)'} - ${errorBody}`);
117
+ }
118
+ // --- Success Path ---
119
+ // Optionally log the UI URL (needs JSON parsing)
120
+ let responseData = null;
121
+ try {
122
+ // Handle 204 No Content specifically
123
+ if (response.status === 204) {
124
+ responseData = null; // Or maybe { success: true }?
125
+ }
126
+ else {
127
+ responseData = yield response.json(); // Parse JSON only on success
128
+ }
129
+ }
130
+ catch (parseError) {
131
+ logger.warn("Failed to parse successful API response JSON.", { error: parseError });
132
+ // Depending on requirements, maybe throw, maybe return a default success object
133
+ throw new Error(`API request succeeded (${response.status}), but failed to parse JSON response.`);
134
+ }
135
+ if (responseData === null || responseData === void 0 ? void 0 : responseData.ui_results_url) {
109
136
  console.info(`
110
- 🔍 View trace: ${response.ui_results_url}
137
+ 🔍 View trace: ${responseData.ui_results_url}
111
138
  `);
112
139
  }
113
- return response;
140
+ // Return the parsed data (or null for 204)
141
+ return responseData;
114
142
  });
115
143
  }
116
144
  deleteTrace(traceId) {
@@ -139,6 +167,35 @@ class TraceManagerClient {
139
167
  });
140
168
  });
141
169
  }
170
+ /**
171
+ * Calculate token costs directly using the API endpoint.
172
+ * This is more accurate than client-side calculation as it uses the most up-to-date pricing.
173
+ *
174
+ * @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229')
175
+ * @param promptTokens Number of tokens in the prompt/input
176
+ * @param completionTokens Number of tokens in the completion/output
177
+ * @returns Object containing token counts and calculated costs in USD
178
+ */
179
+ calculateTokenCosts(model, promptTokens, completionTokens) {
180
+ return __awaiter(this, void 0, void 0, function* () {
181
+ try {
182
+ // Use the new calculation endpoint
183
+ const result = yield this._fetch(JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL, {
184
+ method: 'POST',
185
+ body: JSON.stringify({
186
+ model,
187
+ prompt_tokens: promptTokens,
188
+ completion_tokens: completionTokens
189
+ })
190
+ });
191
+ return result;
192
+ }
193
+ catch (error) {
194
+ logger.warn(`Failed to calculate token costs for model ${model}.`, { error: error instanceof Error ? error.message : String(error) });
195
+ return null;
196
+ }
197
+ });
198
+ }
142
199
  }
143
200
  // --- Helper Functions ---
144
201
  // Helper function to sanitize names (e.g., replace spaces with underscores)
@@ -155,6 +212,7 @@ class TraceClient {
155
212
  constructor(config) {
156
213
  var _a, _b, _c, _d, _e;
157
214
  this.traceManager = null; // Can be null if monitoring disabled
215
+ this._spanDepths = {}; // Track depth of active spans
158
216
  this.traceId = config.traceId || uuidv4();
159
217
  this.originalName = config.name || 'default_trace'; // Store original
160
218
  this.name = sanitizeName(this.originalName); // Use sanitized name internally
@@ -193,7 +251,7 @@ class TraceClient {
193
251
  recordInput(inputs) {
194
252
  const traceClientContext = getTraceClientContext();
195
253
  const currentEntry = traceClientContext.entryStack.at(-1);
196
- if (!currentEntry) {
254
+ if (!currentEntry || !currentEntry.span_id) {
197
255
  console.warn(`No current entry to record input to\nStack trace: ${new Error().stack}`);
198
256
  return;
199
257
  }
@@ -202,14 +260,16 @@ class TraceClient {
202
260
  span_id: currentEntry.span_id,
203
261
  inputs,
204
262
  function: currentEntry.function,
205
- depth: currentEntry.depth,
206
- span_type: currentEntry.span_type
263
+ depth: this._spanDepths[currentEntry.span_id],
264
+ created_at: Date.now() / 1000,
265
+ span_type: currentEntry.span_type,
266
+ message: `Inputs to ${currentEntry.function}`
207
267
  });
208
268
  }
209
269
  recordOutput(output) {
210
270
  const traceClientContext = getTraceClientContext();
211
271
  const currentEntry = traceClientContext.entryStack.at(-1);
212
- if (!currentEntry) {
272
+ if (!currentEntry || !currentEntry.span_id) {
213
273
  console.warn(`No current entry to record output to\nStack trace: ${new Error().stack}`);
214
274
  return;
215
275
  }
@@ -218,33 +278,28 @@ class TraceClient {
218
278
  span_id: currentEntry.span_id,
219
279
  output,
220
280
  function: currentEntry.function,
221
- depth: currentEntry.depth,
222
- span_type: currentEntry.span_type
281
+ depth: this._spanDepths[currentEntry.span_id],
282
+ created_at: Date.now() / 1000,
283
+ span_type: currentEntry.span_type,
284
+ message: `Output from ${currentEntry.function}`
223
285
  });
224
286
  }
225
287
  recordError(error) {
226
- var _a;
227
288
  const traceClientContext = getTraceClientContext();
228
289
  const currentEntry = traceClientContext.entryStack.at(-1);
229
- if (!currentEntry) {
290
+ if (!currentEntry || !currentEntry.span_id) {
230
291
  console.warn(`No current entry to record error to\nStack trace: ${new Error().stack}`);
231
292
  return;
232
293
  }
233
- let output = error;
234
- if (error instanceof Error) {
235
- output = {
236
- name: error.name,
237
- message: error.message,
238
- stack: (_a = error.stack) === null || _a === void 0 ? void 0 : _a.substring(0, 1000)
239
- };
240
- }
241
294
  this.addEntry({
242
295
  type: 'error',
243
296
  span_id: currentEntry.span_id,
244
- output,
297
+ output: error,
245
298
  function: currentEntry.function,
246
- depth: currentEntry.depth,
247
- span_type: currentEntry.span_type
299
+ depth: this._spanDepths[currentEntry.span_id],
300
+ created_at: Date.now() / 1000,
301
+ span_type: currentEntry.span_type,
302
+ message: `Error from ${currentEntry.function}`
248
303
  });
249
304
  }
250
305
  startSpan(name, options = {}) {
@@ -255,18 +310,20 @@ class TraceClient {
255
310
  const spanType = (_a = options.spanType) !== null && _a !== void 0 ? _a : 'span';
256
311
  const startTime = Date.now() / 1000;
257
312
  let depth = 0, parentSpanId = undefined;
258
- if (parentEntry) {
259
- depth = parentEntry.depth + 1;
313
+ if (parentEntry && parentEntry.span_id) {
314
+ depth = this._spanDepths[parentEntry.span_id] + 1;
260
315
  parentSpanId = parentEntry.span_id;
261
316
  }
317
+ this._spanDepths[spanId] = depth;
262
318
  const entry = {
263
319
  type: 'enter',
264
320
  function: name,
265
321
  span_id: spanId,
266
322
  depth: depth,
267
- timestamp: startTime,
323
+ created_at: startTime,
268
324
  span_type: spanType,
269
- parent_span_id: parentSpanId
325
+ parent_span_id: parentSpanId,
326
+ message: name
270
327
  };
271
328
  this.addEntry(entry);
272
329
  traceClientContext.entryStack.push(entry);
@@ -274,21 +331,24 @@ class TraceClient {
274
331
  endSpan() {
275
332
  const traceClientContext = getTraceClientContext();
276
333
  const enterEntry = traceClientContext.entryStack.pop();
277
- if (!enterEntry) {
334
+ if (!enterEntry || !enterEntry.span_id) {
278
335
  console.warn("No enter entry to end");
279
336
  return;
280
337
  }
281
338
  const endTime = Date.now() / 1000;
282
- const duration = endTime - enterEntry.timestamp;
339
+ const duration = endTime - enterEntry.created_at;
283
340
  this.addEntry({
284
341
  type: 'exit',
285
342
  function: enterEntry.function,
286
343
  span_id: enterEntry.span_id,
287
- depth: enterEntry.depth,
288
- timestamp: endTime,
344
+ depth: this._spanDepths[enterEntry.span_id],
345
+ created_at: endTime,
289
346
  duration: duration,
290
- span_type: enterEntry.span_type
347
+ span_type: enterEntry.span_type,
348
+ message: `← ${enterEntry.function}`
291
349
  });
350
+ // Clean up depth tracking
351
+ delete this._spanDepths[enterEntry.span_id];
292
352
  }
293
353
  *span(name, options = {}) {
294
354
  if (!this.enableMonitoring) {
@@ -306,6 +366,7 @@ class TraceClient {
306
366
  condenseTrace(rawEntries) {
307
367
  var _a, _b, _c, _d, _e;
308
368
  const spansById = {};
369
+ const allEvaluationRuns = []; // To collect all eval runs
309
370
  for (const entry of rawEntries) {
310
371
  const spanId = entry.span_id;
311
372
  if (!spanId)
@@ -315,7 +376,8 @@ class TraceClient {
315
376
  span_id: spanId,
316
377
  function: entry.function || 'unknown',
317
378
  depth: (_a = entry.depth) !== null && _a !== void 0 ? _a : 0,
318
- timestamp: (_b = entry.timestamp) !== null && _b !== void 0 ? _b : 0,
379
+ created_at: new Date(((_b = entry.created_at) !== null && _b !== void 0 ? _b : 0) * 1000).toISOString(), // Convert number to ISO string
380
+ trace_id: this.traceId, // Add trace_id
319
381
  parent_span_id: entry.parent_span_id,
320
382
  span_type: entry.span_type || 'span',
321
383
  inputs: null,
@@ -330,14 +392,14 @@ class TraceClient {
330
392
  case 'enter':
331
393
  currentSpanData.function = entry.function || currentSpanData.function;
332
394
  currentSpanData.depth = (_c = entry.depth) !== null && _c !== void 0 ? _c : currentSpanData.depth;
333
- currentSpanData.timestamp = (_d = entry.timestamp) !== null && _d !== void 0 ? _d : currentSpanData.timestamp;
395
+ currentSpanData.created_at = new Date(((_d = entry.created_at) !== null && _d !== void 0 ? _d : 0) * 1000).toISOString(); // Ensure created_at is string on update
334
396
  currentSpanData.parent_span_id = entry.parent_span_id;
335
397
  currentSpanData.span_type = entry.span_type || currentSpanData.span_type;
336
- currentSpanData.start_time = entry.timestamp;
398
+ currentSpanData.start_time = entry.created_at; // Keep original number for duration calc
337
399
  break;
338
400
  case 'exit':
339
401
  currentSpanData.duration = (_e = entry.duration) !== null && _e !== void 0 ? _e : currentSpanData.duration;
340
- currentSpanData.end_time = entry.timestamp;
402
+ currentSpanData.end_time = entry.created_at; // Keep original number for duration calc
341
403
  if (currentSpanData.duration === null && currentSpanData.start_time && currentSpanData.end_time) {
342
404
  currentSpanData.duration = currentSpanData.end_time - currentSpanData.start_time;
343
405
  }
@@ -355,8 +417,11 @@ class TraceClient {
355
417
  currentSpanData.output = entry.output;
356
418
  break;
357
419
  case 'evaluation':
358
- if (entry.evaluation_runs) {
359
- currentSpanData.evaluation_runs.push(...entry.evaluation_runs);
420
+ // Check if evaluation_runs is an array and has at least one element
421
+ if (Array.isArray(entry.evaluation_runs) && entry.evaluation_runs.length > 0) {
422
+ const evalPayload = entry.evaluation_runs[0]; // Extract the payload object
423
+ currentSpanData.evaluation_runs.push(evalPayload); // Add the object to the span's list
424
+ allEvaluationRuns.push(evalPayload); // Add the object to the central list
360
425
  }
361
426
  break;
362
427
  }
@@ -387,9 +452,11 @@ class TraceClient {
387
452
  childrenMap[parentId].push(span);
388
453
  }
389
454
  }
390
- roots.sort((a, b) => a.timestamp - b.timestamp);
455
+ // Sort using parsed dates
456
+ roots.sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
391
457
  for (const parentId in childrenMap) {
392
- childrenMap[parentId].sort((a, b) => a.timestamp - b.timestamp);
458
+ // Sort using parsed dates
459
+ childrenMap[parentId].sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
393
460
  }
394
461
  function buildFlatListDfs(span) {
395
462
  if (visited.has(span.span_id))
@@ -410,26 +477,36 @@ class TraceClient {
410
477
  buildFlatListDfs(span);
411
478
  }
412
479
  }
413
- return sortedCondensedList;
480
+ return [sortedCondensedList, allEvaluationRuns]; // Return both
414
481
  }
415
482
  save() {
416
483
  return __awaiter(this, arguments, void 0, function* (emptySave = false) {
484
+ var _a, _b, _c, _d, _e, _f, _g, _h;
417
485
  if (!this.enableMonitoring || !this.traceManager) {
418
486
  return null;
419
487
  }
420
488
  const traceClientContext = getTraceClientContext();
421
489
  const totalDuration = this.getDuration();
422
- const condensedEntries = this.condenseTrace(traceClientContext.entries);
490
+ // Use the tuple returned by condenseTrace
491
+ const [condensedEntries, evaluationRuns] = this.condenseTrace(traceClientContext.entries);
423
492
  const tokenCounts = {
424
- prompt_tokens: 0, completion_tokens: 0, total_tokens: 0,
425
- prompt_tokens_cost_usd: 0.0, completion_tokens_cost_usd: 0.0, total_cost_usd: 0.0
493
+ prompt_tokens: 0,
494
+ completion_tokens: 0,
495
+ total_tokens: 0,
496
+ prompt_tokens_cost_usd: 0.0,
497
+ completion_tokens_cost_usd: 0.0,
498
+ total_cost_usd: 0.0
426
499
  };
427
- condensedEntries.forEach(entry => {
428
- var _a, _b;
500
+ // First pass: collect all LLM calls with their token counts
501
+ const llmCalls = [];
502
+ let index = 0;
503
+ for (const entry of condensedEntries) {
429
504
  if (entry.span_type === 'llm' && ((_a = entry.output) === null || _a === void 0 ? void 0 : _a.usage)) {
430
505
  const usage = entry.output.usage;
506
+ const modelName = ((_b = entry.inputs) === null || _b === void 0 ? void 0 : _b.model) || "";
431
507
  let promptTokens = 0;
432
508
  let completionTokens = 0;
509
+ // Handle different token naming conventions
433
510
  if (usage.prompt_tokens !== undefined || usage.completion_tokens !== undefined) {
434
511
  promptTokens = usage.prompt_tokens || 0;
435
512
  completionTokens = usage.completion_tokens || 0;
@@ -437,6 +514,7 @@ class TraceClient {
437
514
  else if (usage.input_tokens !== undefined || usage.output_tokens !== undefined) {
438
515
  promptTokens = usage.input_tokens || 0;
439
516
  completionTokens = usage.output_tokens || 0;
517
+ // Standardize naming
440
518
  usage.prompt_tokens = promptTokens;
441
519
  usage.completion_tokens = completionTokens;
442
520
  delete usage.input_tokens;
@@ -445,33 +523,63 @@ class TraceClient {
445
523
  tokenCounts.prompt_tokens += promptTokens;
446
524
  tokenCounts.completion_tokens += completionTokens;
447
525
  tokenCounts.total_tokens += usage.total_tokens || (promptTokens + completionTokens);
448
- const modelName = ((_b = entry.inputs) === null || _b === void 0 ? void 0 : _b.model) || "";
526
+ // Add to list of calls for cost calculation
449
527
  if (modelName) {
450
- try {
451
- const promptCost = 0.0;
452
- const completionCost = 0.0;
453
- const callTotalCost = promptCost + completionCost;
454
- usage.prompt_tokens_cost_usd = promptCost;
455
- usage.completion_tokens_cost_usd = completionCost;
456
- usage.total_cost_usd = callTotalCost;
457
- tokenCounts.prompt_tokens_cost_usd += promptCost;
458
- tokenCounts.completion_tokens_cost_usd += completionCost;
459
- tokenCounts.total_cost_usd += callTotalCost;
528
+ llmCalls.push({
529
+ modelName,
530
+ promptTokens,
531
+ completionTokens,
532
+ entryIndex: index
533
+ });
534
+ }
535
+ }
536
+ index++;
537
+ }
538
+ // Second pass: calculate costs for each LLM call using the API
539
+ if (this.traceManager && llmCalls.length > 0) {
540
+ // Process each LLM call
541
+ for (const call of llmCalls) {
542
+ try {
543
+ // Get costs from the API
544
+ const costs = yield this.traceManager.calculateTokenCosts(call.modelName, call.promptTokens, call.completionTokens);
545
+ if (costs) {
546
+ // Update the entry with the costs
547
+ const entry = condensedEntries[call.entryIndex];
548
+ if ((_c = entry.output) === null || _c === void 0 ? void 0 : _c.usage) {
549
+ entry.output.usage.prompt_tokens_cost_usd = costs.prompt_tokens_cost_usd;
550
+ entry.output.usage.completion_tokens_cost_usd = costs.completion_tokens_cost_usd;
551
+ entry.output.usage.total_cost_usd = costs.total_cost_usd;
552
+ }
553
+ // Add to the total costs, ensuring values are numbers (default to 0)
554
+ tokenCounts.prompt_tokens_cost_usd += (_d = costs.prompt_tokens_cost_usd) !== null && _d !== void 0 ? _d : 0.0;
555
+ tokenCounts.completion_tokens_cost_usd += (_e = costs.completion_tokens_cost_usd) !== null && _e !== void 0 ? _e : 0.0;
556
+ tokenCounts.total_cost_usd += (_f = costs.total_cost_usd) !== null && _f !== void 0 ? _f : 0.0;
460
557
  }
461
- catch (e) {
462
- console.warn(`Error calculating cost for model '${modelName}':`, e);
463
- usage.prompt_tokens_cost_usd = null;
464
- usage.completion_tokens_cost_usd = null;
465
- usage.total_cost_usd = null;
558
+ else {
559
+ // If calculation failed, set costs to null in the entry (matching Python behavior)
560
+ const entry = condensedEntries[call.entryIndex];
561
+ if ((_g = entry.output) === null || _g === void 0 ? void 0 : _g.usage) {
562
+ entry.output.usage.prompt_tokens_cost_usd = null;
563
+ entry.output.usage.completion_tokens_cost_usd = null;
564
+ entry.output.usage.total_cost_usd = null;
565
+ }
566
+ // Log warning, but totals remain 0 for this call
567
+ logger.warn(`Token cost calculation failed for model '${call.modelName}'. Cost information will not be available.`);
466
568
  }
467
569
  }
468
- else {
469
- usage.prompt_tokens_cost_usd = null;
470
- usage.completion_tokens_cost_usd = null;
471
- usage.total_cost_usd = null;
570
+ catch (e) {
571
+ logger.warn(`Error calculating cost for model '${call.modelName}':`, e);
572
+ // Set costs to null in the entry
573
+ const entry = condensedEntries[call.entryIndex];
574
+ if ((_h = entry.output) === null || _h === void 0 ? void 0 : _h.usage) {
575
+ entry.output.usage.prompt_tokens_cost_usd = null;
576
+ entry.output.usage.completion_tokens_cost_usd = null;
577
+ entry.output.usage.total_cost_usd = null;
578
+ }
579
+ // Totals remain unchanged (effectively adding 0)
472
580
  }
473
581
  }
474
- });
582
+ }
475
583
  // Convert rules array to a dictionary (Record<string, Rule>)
476
584
  const rulesDict = {};
477
585
  this.rules.forEach(rule => {
@@ -488,16 +596,15 @@ class TraceClient {
488
596
  duration: totalDuration,
489
597
  token_counts: tokenCounts,
490
598
  entries: condensedEntries,
491
- rules: rulesDict,
492
- empty_save: emptySave,
599
+ evaluation_runs: evaluationRuns,
493
600
  overwrite: this.overwrite,
494
601
  parent_trace_id: this.parentTraceId,
495
602
  parent_name: this.parentName
496
603
  };
497
604
  try {
498
- yield this.traceManager.saveTrace(traceData, emptySave);
605
+ yield this.traceManager.saveTrace(traceData);
499
606
  logger.info(`Trace ${this.traceId} saved successfully.`);
500
- if (!emptySave && this.enableEvaluations) {
607
+ if (this.enableEvaluations) {
501
608
  try {
502
609
  yield this.traceManager.addTraceToEvalQueue(traceData);
503
610
  logger.info(`Trace ${this.traceId} added to evaluation queue.`);
@@ -531,7 +638,7 @@ class TraceClient {
531
638
  traceClientContext.entries.forEach(entry => {
532
639
  var _a;
533
640
  const indent = " ".repeat((_a = entry.depth) !== null && _a !== void 0 ? _a : 0);
534
- const timeStr = entry.timestamp ? `@ ${new Date(entry.timestamp * 1000).toISOString()}` : '';
641
+ const timeStr = entry.created_at ? `@ ${new Date(entry.created_at * 1000).toISOString()}` : '';
535
642
  const shortSpanId = entry.span_id ? `(id: ${entry.span_id.substring(0, 8)}...)` : '';
536
643
  const shortParentId = entry.parent_span_id ? `(parent: ${entry.parent_span_id.substring(0, 8)}...)` : '';
537
644
  try {
@@ -612,9 +719,8 @@ class TraceClient {
612
719
  * @returns Promise that resolves when the evaluation entry has been added to the trace
613
720
  */
614
721
  asyncEvaluate(scorers_1) {
615
- return __awaiter(this, arguments, void 0, function* (
616
- // Accept general Scorer type, but filter/check for API scorers internally
617
- scorers, options = {}) {
722
+ return __awaiter(this, arguments, void 0, function* (scorers, options = {}) {
723
+ var _a;
618
724
  if (!this.enableEvaluations) {
619
725
  logger.warn("Evaluations are disabled. Skipping async evaluation.");
620
726
  return;
@@ -629,6 +735,12 @@ class TraceClient {
629
735
  logger.warn("No APIJudgmentScorers found in the provided scorers list. Skipping async evaluation as backend requires API scorers.");
630
736
  return;
631
737
  }
738
+ // Process rules (currently just using this.rules directly)
739
+ const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
740
+ // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
741
+ if (loadedRules && loadedRules.length > 0 && scorers.some(s => !(s instanceof APIJudgmentScorer))) {
742
+ throw new Error("Cannot use Judgeval scorers, you can only use API scorers when using rules. Please either remove rules or use only APIJudgmentScorer types.");
743
+ }
632
744
  const startTime = Date.now() / 1000; // Record start time in seconds
633
745
  // Create example structure matching Python/backend expectations
634
746
  const example = {
@@ -661,8 +773,6 @@ class TraceClient {
661
773
  const idPart = currentEntry ? currentEntry.span_id.substring(0, 8) : this.traceId.substring(0, 8);
662
774
  const evalName = `${this.name.charAt(0).toUpperCase() + this.name.slice(1)}-${idPart}-[${scorerNames}]`;
663
775
  // --- End eval name creation ---
664
- // Process rules (currently just using this.rules directly)
665
- const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
666
776
  // Construct the evaluation payload
667
777
  const evalRunPayload = {
668
778
  organization_id: this.organizationId,
@@ -677,45 +787,24 @@ class TraceClient {
677
787
  override: this.overwrite, // Use trace's overwrite setting
678
788
  rules: loadedRules // Pass the processed rules
679
789
  };
680
- // Add evaluation entry using the helper method
681
- this._addEvalRun(evalRunPayload, startTime);
790
+ // Add evaluation entry to the trace
791
+ this.addEntry({
792
+ type: "evaluation",
793
+ function: currentEntry.function,
794
+ span_id: currentEntry.span_id, // May be undefined
795
+ depth: (_a = currentEntry.depth) !== null && _a !== void 0 ? _a : 0,
796
+ created_at: Date.now() / 1000,
797
+ evaluation_runs: [evalRunPayload], // Store the object back in an array to match interface
798
+ duration: Date.now() / 1000 - startTime,
799
+ span_type: currentEntry.span_type
800
+ });
682
801
  }
683
802
  catch (error) {
684
- console.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
685
- // Decide if we should re-throw or just log
803
+ logger.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
804
+ throw error; // Re-throw after logging
686
805
  }
687
806
  });
688
807
  }
689
- /**
690
- * Private helper to add an evaluation entry to the trace.
691
- * This mirrors the structure of Python's add_eval_run.
692
- *
693
- * @param evalRunPayload The constructed payload for the evaluation.
694
- * @param startTime The start time (in seconds) of the evaluation process.
695
- */
696
- _addEvalRun(evalRunPayload, startTime) {
697
- var _a, _b;
698
- const traceClientContext = getTraceClientContext();
699
- const currentEntry = traceClientContext.entryStack.at(-1);
700
- if (!currentEntry) {
701
- logger.warn(`No current entry to record evaluation to\nStack trace: ${new Error().stack}`);
702
- return;
703
- }
704
- const function_ = (_a = currentEntry.function) !== null && _a !== void 0 ? _a : "unknown_function";
705
- const depth = (_b = currentEntry.depth) !== null && _b !== void 0 ? _b : 0;
706
- const duration = Date.now() / 1000 - startTime;
707
- // Add evaluation entry to the trace
708
- this.addEntry({
709
- type: "evaluation",
710
- function: function_,
711
- span_id: currentEntry.span_id, // May be undefined
712
- depth: depth,
713
- timestamp: Date.now() / 1000,
714
- evaluation_runs: [evalRunPayload], // Embed the payload
715
- duration: duration,
716
- span_type: "evaluation"
717
- });
718
- }
719
808
  // OPTIONAL: Add a method to get the original name if needed elsewhere
720
809
  getOriginalName() {
721
810
  return this.originalName;
@@ -796,11 +885,6 @@ class Tracer {
796
885
  apiKey: this.apiKey,
797
886
  organizationId: this.organizationId,
798
887
  });
799
- if (traceClient.enableMonitoring) {
800
- traceClient.save(true).catch(err => {
801
- logger.error(`Failed to save empty trace (${traceClient.traceId}):`, err);
802
- });
803
- }
804
888
  return traceClient;
805
889
  }
806
890
  *trace(name, options = {}) {