npm - judgeval - Versions diffs - 0.2.0 → 0.2.1 - Mend

judgeval 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +95 -68
package/dist/cjs/common/logger-instance.js +17 -19
package/dist/cjs/common/logger-instance.js.map +1 -1
package/dist/cjs/common/tracer.js +210 -126
package/dist/cjs/common/tracer.js.map +1 -1
package/dist/cjs/constants.js +3 -2
package/dist/cjs/constants.js.map +1 -1
package/dist/cjs/index.js +1 -3
package/dist/cjs/index.js.map +1 -1
package/dist/cjs/judgment-client.js +20 -114
package/dist/cjs/judgment-client.js.map +1 -1
package/dist/cjs/scorers/api-scorer.js +56 -48
package/dist/cjs/scorers/api-scorer.js.map +1 -1
package/dist/cjs/scorers/base-scorer.js +66 -11
package/dist/cjs/scorers/base-scorer.js.map +1 -1
package/dist/esm/common/logger-instance.js +17 -19
package/dist/esm/common/logger-instance.js.map +1 -1
package/dist/esm/common/tracer.js +211 -127
package/dist/esm/common/tracer.js.map +1 -1
package/dist/esm/constants.js +2 -1
package/dist/esm/constants.js.map +1 -1
package/dist/esm/index.js +0 -1
package/dist/esm/index.js.map +1 -1
package/dist/esm/judgment-client.js +20 -114
package/dist/esm/judgment-client.js.map +1 -1
package/dist/esm/scorers/api-scorer.js +56 -48
package/dist/esm/scorers/api-scorer.js.map +1 -1
package/dist/esm/scorers/base-scorer.js +66 -11
package/dist/esm/scorers/base-scorer.js.map +1 -1
package/dist/types/common/tracer.d.ts +27 -13
package/dist/types/constants.d.ts +2 -1
package/dist/types/index.d.ts +0 -1
package/dist/types/judgment-client.d.ts +0 -22
package/dist/types/scorers/api-scorer.d.ts +15 -15
package/dist/types/scorers/base-scorer.d.ts +53 -10
package/package.json +10 -3
package/dist/cjs/scorers/exact-match-scorer.js +0 -84
package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
package/dist/esm/scorers/exact-match-scorer.js +0 -80
package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
package/dist/types/scorers/exact-match-scorer.d.ts +0 -10

package/dist/esm/common/tracer.js CHANGED Viewed

@@ -14,7 +14,7 @@ import { AsyncLocalStorage } from 'async_hooks';
 import OpenAI from 'openai';
 import Anthropic from '@anthropic-ai/sdk';
 // Local Imports
-import { JUDGMENT_TRACES_SAVE_API_URL, JUDGMENT_TRACES_FETCH_API_URL, JUDGMENT_TRACES_DELETE_API_URL, JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL,
+import { JUDGMENT_TRACES_SAVE_API_URL, JUDGMENT_TRACES_FETCH_API_URL, JUDGMENT_TRACES_DELETE_API_URL, JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL, JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL,
 // Add other necessary constants if needed
  } from '../constants.js';
 import { APIJudgmentScorer } from '../scorers/base-scorer.js';
@@ -70,16 +70,13 @@ class TraceManagerClient {
             try {
                 // Use isomorphic fetch (available globally in modern Node.js and browsers)
                 const response = yield fetch(url, Object.assign(Object.assign({}, options), { headers: headers }));
-                if (!response.ok) {
-                    const errorBody = yield response.text();
-                    console.error(`API Error (${response.status}) for ${options.method || 'GET'} ${url}: ${errorBody}`);
-                    throw new Error(`Judgment API request failed: ${response.status} ${response.statusText} - ${errorBody}`);
-                }
+                // We will return the response object even if !response.ok
+                // The caller (e.g., saveTrace) is responsible for checking response.ok or response.status
                 // Handle cases where the response might be empty (e.g., 204 No Content on DELETE)
                 if (response.status === 204) {
                     return null; // Indicate success with no content
                 }
-                return yield response.json();
+                return response;
             }
             catch (error) {
                 console.error(`Network or fetch error during ${options.method || 'GET'} ${url}:`, error);
@@ -96,21 +93,52 @@ class TraceManagerClient {
             });
         });
     }
-    saveTrace(traceData, emptySave) {
+    saveTrace(traceData) {
         return __awaiter(this, void 0, void 0, function* () {
+            // _fetch now returns the raw response object or throws on network error
             const response = yield this._fetch(JUDGMENT_TRACES_SAVE_API_URL, {
                 method: 'POST',
-                body: JSON.stringify(traceData),
+                body: JSON.stringify(traceData), // Stringify directly here again
             });
-            // Optionally log the UI URL like the Python version
-            if (!emptySave && (response === null || response === void 0 ? void 0 : response.ui_results_url)) {
-                // Use console.info or a dedicated logger for user-facing messages
-                // Note: We can't replicate Rich library's colored link easily in standard console
+            // Check if _fetch threw a network error (caught below) or returned an invalid object
+            if (!response) {
+                // This case should ideally be caught by _fetch's catch block, but double-check
+                throw new Error('Failed to save trace data: No response received from API.');
+            }
+            // Now, check the status code on the received response object
+            if (response.status === 400) {
+                // Attempt to get error body for more info
+                const errorBody = yield response.text();
+                throw new Error(`Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: ${response.status} ${response.statusText || ''} - ${errorBody}`);
+            }
+            else if (!response.ok) { // Handles other errors (5xx, 4xx except 400)
+                const errorBody = yield response.text();
+                throw new Error(`Failed to save trace data: Status ${response.status} ${response.statusText || '(No status text)'} - ${errorBody}`);
+            }
+            // --- Success Path ---
+            // Optionally log the UI URL (needs JSON parsing)
+            let responseData = null;
+            try {
+                // Handle 204 No Content specifically
+                if (response.status === 204) {
+                    responseData = null; // Or maybe { success: true }?
+                }
+                else {
+                    responseData = yield response.json(); // Parse JSON only on success
+                }
+            }
+            catch (parseError) {
+                logger.warn("Failed to parse successful API response JSON.", { error: parseError });
+                // Depending on requirements, maybe throw, maybe return a default success object
+                throw new Error(`API request succeeded (${response.status}), but failed to parse JSON response.`);
+            }
+            if (responseData === null || responseData === void 0 ? void 0 : responseData.ui_results_url) {
                 console.info(`
-🔍 View trace: ${response.ui_results_url}
+🔍 View trace: ${responseData.ui_results_url}
 `);
             }
-            return response;
+            // Return the parsed data (or null for 204)
+            return responseData;
         });
     }
     deleteTrace(traceId) {
@@ -139,6 +167,35 @@ class TraceManagerClient {
             });
         });
     }
+    /**
+     * Calculate token costs directly using the API endpoint.
+     * This is more accurate than client-side calculation as it uses the most up-to-date pricing.
+     *
+     * @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229')
+     * @param promptTokens Number of tokens in the prompt/input
+     * @param completionTokens Number of tokens in the completion/output
+     * @returns Object containing token counts and calculated costs in USD
+     */
+    calculateTokenCosts(model, promptTokens, completionTokens) {
+        return __awaiter(this, void 0, void 0, function* () {
+            try {
+                // Use the new calculation endpoint
+                const result = yield this._fetch(JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL, {
+                    method: 'POST',
+                    body: JSON.stringify({
+                        model,
+                        prompt_tokens: promptTokens,
+                        completion_tokens: completionTokens
+                    })
+                });
+                return result;
+            }
+            catch (error) {
+                logger.warn(`Failed to calculate token costs for model ${model}.`, { error: error instanceof Error ? error.message : String(error) });
+                return null;
+            }
+        });
+    }
 }
 // --- Helper Functions ---
 // Helper function to sanitize names (e.g., replace spaces with underscores)
@@ -155,6 +212,7 @@ class TraceClient {
     constructor(config) {
         var _a, _b, _c, _d, _e;
         this.traceManager = null; // Can be null if monitoring disabled
+        this._spanDepths = {}; // Track depth of active spans
         this.traceId = config.traceId || uuidv4();
         this.originalName = config.name || 'default_trace'; // Store original
         this.name = sanitizeName(this.originalName); // Use sanitized name internally
@@ -193,7 +251,7 @@ class TraceClient {
     recordInput(inputs) {
         const traceClientContext = getTraceClientContext();
         const currentEntry = traceClientContext.entryStack.at(-1);
-        if (!currentEntry) {
+        if (!currentEntry || !currentEntry.span_id) {
             console.warn(`No current entry to record input to\nStack trace: ${new Error().stack}`);
             return;
         }
@@ -202,14 +260,16 @@ class TraceClient {
             span_id: currentEntry.span_id,
             inputs,
             function: currentEntry.function,
-            depth: currentEntry.depth,
-            span_type: currentEntry.span_type
+            depth: this._spanDepths[currentEntry.span_id],
+            created_at: Date.now() / 1000,
+            span_type: currentEntry.span_type,
+            message: `Inputs to ${currentEntry.function}`
         });
     }
     recordOutput(output) {
         const traceClientContext = getTraceClientContext();
         const currentEntry = traceClientContext.entryStack.at(-1);
-        if (!currentEntry) {
+        if (!currentEntry || !currentEntry.span_id) {
             console.warn(`No current entry to record output to\nStack trace: ${new Error().stack}`);
             return;
         }
@@ -218,33 +278,28 @@ class TraceClient {
             span_id: currentEntry.span_id,
             output,
             function: currentEntry.function,
-            depth: currentEntry.depth,
-            span_type: currentEntry.span_type
+            depth: this._spanDepths[currentEntry.span_id],
+            created_at: Date.now() / 1000,
+            span_type: currentEntry.span_type,
+            message: `Output from ${currentEntry.function}`
         });
     }
     recordError(error) {
-        var _a;
         const traceClientContext = getTraceClientContext();
         const currentEntry = traceClientContext.entryStack.at(-1);
-        if (!currentEntry) {
+        if (!currentEntry || !currentEntry.span_id) {
             console.warn(`No current entry to record error to\nStack trace: ${new Error().stack}`);
             return;
         }
-        let output = error;
-        if (error instanceof Error) {
-            output = {
-                name: error.name,
-                message: error.message,
-                stack: (_a = error.stack) === null || _a === void 0 ? void 0 : _a.substring(0, 1000)
-            };
-        }
         this.addEntry({
             type: 'error',
             span_id: currentEntry.span_id,
-            output,
+            output: error,
             function: currentEntry.function,
-            depth: currentEntry.depth,
-            span_type: currentEntry.span_type
+            depth: this._spanDepths[currentEntry.span_id],
+            created_at: Date.now() / 1000,
+            span_type: currentEntry.span_type,
+            message: `Error from ${currentEntry.function}`
         });
     }
     startSpan(name, options = {}) {
@@ -255,18 +310,20 @@ class TraceClient {
         const spanType = (_a = options.spanType) !== null && _a !== void 0 ? _a : 'span';
         const startTime = Date.now() / 1000;
         let depth = 0, parentSpanId = undefined;
-        if (parentEntry) {
-            depth = parentEntry.depth + 1;
+        if (parentEntry && parentEntry.span_id) {
+            depth = this._spanDepths[parentEntry.span_id] + 1;
             parentSpanId = parentEntry.span_id;
         }
+        this._spanDepths[spanId] = depth;
         const entry = {
             type: 'enter',
             function: name,
             span_id: spanId,
             depth: depth,
-            timestamp: startTime,
+            created_at: startTime,
             span_type: spanType,
-            parent_span_id: parentSpanId
+            parent_span_id: parentSpanId,
+            message: name
         };
         this.addEntry(entry);
         traceClientContext.entryStack.push(entry);
@@ -274,21 +331,24 @@ class TraceClient {
     endSpan() {
         const traceClientContext = getTraceClientContext();
         const enterEntry = traceClientContext.entryStack.pop();
-        if (!enterEntry) {
+        if (!enterEntry || !enterEntry.span_id) {
             console.warn("No enter entry to end");
             return;
         }
         const endTime = Date.now() / 1000;
-        const duration = endTime - enterEntry.timestamp;
+        const duration = endTime - enterEntry.created_at;
         this.addEntry({
             type: 'exit',
             function: enterEntry.function,
             span_id: enterEntry.span_id,
-            depth: enterEntry.depth,
-            timestamp: endTime,
+            depth: this._spanDepths[enterEntry.span_id],
+            created_at: endTime,
             duration: duration,
-            span_type: enterEntry.span_type
+            span_type: enterEntry.span_type,
+            message: `← ${enterEntry.function}`
         });
+        // Clean up depth tracking
+        delete this._spanDepths[enterEntry.span_id];
     }
     *span(name, options = {}) {
         if (!this.enableMonitoring) {
@@ -306,6 +366,7 @@ class TraceClient {
     condenseTrace(rawEntries) {
         var _a, _b, _c, _d, _e;
         const spansById = {};
+        const allEvaluationRuns = []; // To collect all eval runs
         for (const entry of rawEntries) {
             const spanId = entry.span_id;
             if (!spanId)
@@ -315,7 +376,8 @@ class TraceClient {
                     span_id: spanId,
                     function: entry.function || 'unknown',
                     depth: (_a = entry.depth) !== null && _a !== void 0 ? _a : 0,
-                    timestamp: (_b = entry.timestamp) !== null && _b !== void 0 ? _b : 0,
+                    created_at: new Date(((_b = entry.created_at) !== null && _b !== void 0 ? _b : 0) * 1000).toISOString(), // Convert number to ISO string
+                    trace_id: this.traceId, // Add trace_id
                     parent_span_id: entry.parent_span_id,
                     span_type: entry.span_type || 'span',
                     inputs: null,
@@ -330,14 +392,14 @@ class TraceClient {
                 case 'enter':
                     currentSpanData.function = entry.function || currentSpanData.function;
                     currentSpanData.depth = (_c = entry.depth) !== null && _c !== void 0 ? _c : currentSpanData.depth;
-                    currentSpanData.timestamp = (_d = entry.timestamp) !== null && _d !== void 0 ? _d : currentSpanData.timestamp;
+                    currentSpanData.created_at = new Date(((_d = entry.created_at) !== null && _d !== void 0 ? _d : 0) * 1000).toISOString(); // Ensure created_at is string on update
                     currentSpanData.parent_span_id = entry.parent_span_id;
                     currentSpanData.span_type = entry.span_type || currentSpanData.span_type;
-                    currentSpanData.start_time = entry.timestamp;
+                    currentSpanData.start_time = entry.created_at; // Keep original number for duration calc
                     break;
                 case 'exit':
                     currentSpanData.duration = (_e = entry.duration) !== null && _e !== void 0 ? _e : currentSpanData.duration;
-                    currentSpanData.end_time = entry.timestamp;
+                    currentSpanData.end_time = entry.created_at; // Keep original number for duration calc
                     if (currentSpanData.duration === null && currentSpanData.start_time && currentSpanData.end_time) {
                         currentSpanData.duration = currentSpanData.end_time - currentSpanData.start_time;
                     }
@@ -355,8 +417,11 @@ class TraceClient {
                     currentSpanData.output = entry.output;
                     break;
                 case 'evaluation':
-                    if (entry.evaluation_runs) {
-                        currentSpanData.evaluation_runs.push(...entry.evaluation_runs);
+                    // Check if evaluation_runs is an array and has at least one element
+                    if (Array.isArray(entry.evaluation_runs) && entry.evaluation_runs.length > 0) {
+                        const evalPayload = entry.evaluation_runs[0]; // Extract the payload object
+                        currentSpanData.evaluation_runs.push(evalPayload); // Add the object to the span's list
+                        allEvaluationRuns.push(evalPayload); // Add the object to the central list
                     }
                     break;
             }
@@ -387,9 +452,11 @@ class TraceClient {
                 childrenMap[parentId].push(span);
             }
         }
-        roots.sort((a, b) => a.timestamp - b.timestamp);
+        // Sort using parsed dates
+        roots.sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
         for (const parentId in childrenMap) {
-            childrenMap[parentId].sort((a, b) => a.timestamp - b.timestamp);
+            // Sort using parsed dates
+            childrenMap[parentId].sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
         }
         function buildFlatListDfs(span) {
             if (visited.has(span.span_id))
@@ -410,26 +477,36 @@ class TraceClient {
                 buildFlatListDfs(span);
             }
         }
-        return sortedCondensedList;
+        return [sortedCondensedList, allEvaluationRuns]; // Return both
     }
     save() {
         return __awaiter(this, arguments, void 0, function* (emptySave = false) {
+            var _a, _b, _c, _d, _e, _f, _g, _h;
             if (!this.enableMonitoring || !this.traceManager) {
                 return null;
             }
             const traceClientContext = getTraceClientContext();
             const totalDuration = this.getDuration();
-            const condensedEntries = this.condenseTrace(traceClientContext.entries);
+            // Use the tuple returned by condenseTrace
+            const [condensedEntries, evaluationRuns] = this.condenseTrace(traceClientContext.entries);
             const tokenCounts = {
-                prompt_tokens: 0, completion_tokens: 0, total_tokens: 0,
-                prompt_tokens_cost_usd: 0.0, completion_tokens_cost_usd: 0.0, total_cost_usd: 0.0
+                prompt_tokens: 0,
+                completion_tokens: 0,
+                total_tokens: 0,
+                prompt_tokens_cost_usd: 0.0,
+                completion_tokens_cost_usd: 0.0,
+                total_cost_usd: 0.0
             };
-            condensedEntries.forEach(entry => {
-                var _a, _b;
+            // First pass: collect all LLM calls with their token counts
+            const llmCalls = [];
+            let index = 0;
+            for (const entry of condensedEntries) {
                 if (entry.span_type === 'llm' && ((_a = entry.output) === null || _a === void 0 ? void 0 : _a.usage)) {
                     const usage = entry.output.usage;
+                    const modelName = ((_b = entry.inputs) === null || _b === void 0 ? void 0 : _b.model) || "";
                     let promptTokens = 0;
                     let completionTokens = 0;
+                    // Handle different token naming conventions
                     if (usage.prompt_tokens !== undefined || usage.completion_tokens !== undefined) {
                         promptTokens = usage.prompt_tokens || 0;
                         completionTokens = usage.completion_tokens || 0;
@@ -437,6 +514,7 @@ class TraceClient {
                     else if (usage.input_tokens !== undefined || usage.output_tokens !== undefined) {
                         promptTokens = usage.input_tokens || 0;
                         completionTokens = usage.output_tokens || 0;
+                        // Standardize naming
                         usage.prompt_tokens = promptTokens;
                         usage.completion_tokens = completionTokens;
                         delete usage.input_tokens;
@@ -445,33 +523,63 @@ class TraceClient {
                     tokenCounts.prompt_tokens += promptTokens;
                     tokenCounts.completion_tokens += completionTokens;
                     tokenCounts.total_tokens += usage.total_tokens || (promptTokens + completionTokens);
-                    const modelName = ((_b = entry.inputs) === null || _b === void 0 ? void 0 : _b.model) || "";
+                    // Add to list of calls for cost calculation
                     if (modelName) {
-                        try {
-                            const promptCost = 0.0;
-                            const completionCost = 0.0;
-                            const callTotalCost = promptCost + completionCost;
-                            usage.prompt_tokens_cost_usd = promptCost;
-                            usage.completion_tokens_cost_usd = completionCost;
-                            usage.total_cost_usd = callTotalCost;
-                            tokenCounts.prompt_tokens_cost_usd += promptCost;
-                            tokenCounts.completion_tokens_cost_usd += completionCost;
-                            tokenCounts.total_cost_usd += callTotalCost;
+                        llmCalls.push({
+                            modelName,
+                            promptTokens,
+                            completionTokens,
+                            entryIndex: index
+                        });
+                    }
+                }
+                index++;
+            }
+            // Second pass: calculate costs for each LLM call using the API
+            if (this.traceManager && llmCalls.length > 0) {
+                // Process each LLM call
+                for (const call of llmCalls) {
+                    try {
+                        // Get costs from the API
+                        const costs = yield this.traceManager.calculateTokenCosts(call.modelName, call.promptTokens, call.completionTokens);
+                        if (costs) {
+                            // Update the entry with the costs
+                            const entry = condensedEntries[call.entryIndex];
+                            if ((_c = entry.output) === null || _c === void 0 ? void 0 : _c.usage) {
+                                entry.output.usage.prompt_tokens_cost_usd = costs.prompt_tokens_cost_usd;
+                                entry.output.usage.completion_tokens_cost_usd = costs.completion_tokens_cost_usd;
+                                entry.output.usage.total_cost_usd = costs.total_cost_usd;
+                            }
+                            // Add to the total costs, ensuring values are numbers (default to 0)
+                            tokenCounts.prompt_tokens_cost_usd += (_d = costs.prompt_tokens_cost_usd) !== null && _d !== void 0 ? _d : 0.0;
+                            tokenCounts.completion_tokens_cost_usd += (_e = costs.completion_tokens_cost_usd) !== null && _e !== void 0 ? _e : 0.0;
+                            tokenCounts.total_cost_usd += (_f = costs.total_cost_usd) !== null && _f !== void 0 ? _f : 0.0;
                         }
-                        catch (e) {
-                            console.warn(`Error calculating cost for model '${modelName}':`, e);
-                            usage.prompt_tokens_cost_usd = null;
-                            usage.completion_tokens_cost_usd = null;
-                            usage.total_cost_usd = null;
+                        else {
+                            // If calculation failed, set costs to null in the entry (matching Python behavior)
+                            const entry = condensedEntries[call.entryIndex];
+                            if ((_g = entry.output) === null || _g === void 0 ? void 0 : _g.usage) {
+                                entry.output.usage.prompt_tokens_cost_usd = null;
+                                entry.output.usage.completion_tokens_cost_usd = null;
+                                entry.output.usage.total_cost_usd = null;
+                            }
+                            // Log warning, but totals remain 0 for this call
+                            logger.warn(`Token cost calculation failed for model '${call.modelName}'. Cost information will not be available.`);
                         }
                     }
-                    else {
-                        usage.prompt_tokens_cost_usd = null;
-                        usage.completion_tokens_cost_usd = null;
-                        usage.total_cost_usd = null;
+                    catch (e) {
+                        logger.warn(`Error calculating cost for model '${call.modelName}':`, e);
+                        // Set costs to null in the entry
+                        const entry = condensedEntries[call.entryIndex];
+                        if ((_h = entry.output) === null || _h === void 0 ? void 0 : _h.usage) {
+                            entry.output.usage.prompt_tokens_cost_usd = null;
+                            entry.output.usage.completion_tokens_cost_usd = null;
+                            entry.output.usage.total_cost_usd = null;
+                        }
+                        // Totals remain unchanged (effectively adding 0)
                     }
                 }
-            });
+            }
             // Convert rules array to a dictionary (Record<string, Rule>)
             const rulesDict = {};
             this.rules.forEach(rule => {
@@ -488,16 +596,15 @@ class TraceClient {
                 duration: totalDuration,
                 token_counts: tokenCounts,
                 entries: condensedEntries,
-                rules: rulesDict,
-                empty_save: emptySave,
+                evaluation_runs: evaluationRuns,
                 overwrite: this.overwrite,
                 parent_trace_id: this.parentTraceId,
                 parent_name: this.parentName
             };
             try {
-                yield this.traceManager.saveTrace(traceData, emptySave);
+                yield this.traceManager.saveTrace(traceData);
                 logger.info(`Trace ${this.traceId} saved successfully.`);
-                if (!emptySave && this.enableEvaluations) {
+                if (this.enableEvaluations) {
                     try {
                         yield this.traceManager.addTraceToEvalQueue(traceData);
                         logger.info(`Trace ${this.traceId} added to evaluation queue.`);
@@ -531,7 +638,7 @@ class TraceClient {
         traceClientContext.entries.forEach(entry => {
             var _a;
             const indent = "  ".repeat((_a = entry.depth) !== null && _a !== void 0 ? _a : 0);
-            const timeStr = entry.timestamp ? `@ ${new Date(entry.timestamp * 1000).toISOString()}` : '';
+            const timeStr = entry.created_at ? `@ ${new Date(entry.created_at * 1000).toISOString()}` : '';
             const shortSpanId = entry.span_id ? `(id: ${entry.span_id.substring(0, 8)}...)` : '';
             const shortParentId = entry.parent_span_id ? `(parent: ${entry.parent_span_id.substring(0, 8)}...)` : '';
             try {
@@ -612,9 +719,8 @@ class TraceClient {
      * @returns Promise that resolves when the evaluation entry has been added to the trace
      */
     asyncEvaluate(scorers_1) {
-        return __awaiter(this, arguments, void 0, function* (
-        // Accept general Scorer type, but filter/check for API scorers internally
-        scorers, options = {}) {
+        return __awaiter(this, arguments, void 0, function* (scorers, options = {}) {
+            var _a;
             if (!this.enableEvaluations) {
                 logger.warn("Evaluations are disabled. Skipping async evaluation.");
                 return;
@@ -629,6 +735,12 @@ class TraceClient {
                 logger.warn("No APIJudgmentScorers found in the provided scorers list. Skipping async evaluation as backend requires API scorers.");
                 return;
             }
+            // Process rules (currently just using this.rules directly)
+            const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
+            // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
+            if (loadedRules && loadedRules.length > 0 && scorers.some(s => !(s instanceof APIJudgmentScorer))) {
+                throw new Error("Cannot use Judgeval scorers, you can only use API scorers when using rules. Please either remove rules or use only APIJudgmentScorer types.");
+            }
             const startTime = Date.now() / 1000; // Record start time in seconds
             // Create example structure matching Python/backend expectations
             const example = {
@@ -661,8 +773,6 @@ class TraceClient {
                 const idPart = currentEntry ? currentEntry.span_id.substring(0, 8) : this.traceId.substring(0, 8);
                 const evalName = `${this.name.charAt(0).toUpperCase() + this.name.slice(1)}-${idPart}-[${scorerNames}]`;
                 // --- End eval name creation ---
-                // Process rules (currently just using this.rules directly)
-                const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
                 // Construct the evaluation payload
                 const evalRunPayload = {
                     organization_id: this.organizationId,
@@ -677,45 +787,24 @@ class TraceClient {
                     override: this.overwrite, // Use trace's overwrite setting
                     rules: loadedRules // Pass the processed rules
                 };
-                // Add evaluation entry using the helper method
-                this._addEvalRun(evalRunPayload, startTime);
+                // Add evaluation entry to the trace
+                this.addEntry({
+                    type: "evaluation",
+                    function: currentEntry.function,
+                    span_id: currentEntry.span_id, // May be undefined
+                    depth: (_a = currentEntry.depth) !== null && _a !== void 0 ? _a : 0,
+                    created_at: Date.now() / 1000,
+                    evaluation_runs: [evalRunPayload], // Store the object back in an array to match interface
+                    duration: Date.now() / 1000 - startTime,
+                    span_type: currentEntry.span_type
+                });
             }
             catch (error) {
-                console.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
-                // Decide if we should re-throw or just log
+                logger.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
+                throw error; // Re-throw after logging
             }
         });
     }
-    /**
-     * Private helper to add an evaluation entry to the trace.
-     * This mirrors the structure of Python's add_eval_run.
-     *
-     * @param evalRunPayload The constructed payload for the evaluation.
-     * @param startTime The start time (in seconds) of the evaluation process.
-     */
-    _addEvalRun(evalRunPayload, startTime) {
-        var _a, _b;
-        const traceClientContext = getTraceClientContext();
-        const currentEntry = traceClientContext.entryStack.at(-1);
-        if (!currentEntry) {
-            logger.warn(`No current entry to record evaluation to\nStack trace: ${new Error().stack}`);
-            return;
-        }
-        const function_ = (_a = currentEntry.function) !== null && _a !== void 0 ? _a : "unknown_function";
-        const depth = (_b = currentEntry.depth) !== null && _b !== void 0 ? _b : 0;
-        const duration = Date.now() / 1000 - startTime;
-        // Add evaluation entry to the trace
-        this.addEntry({
-            type: "evaluation",
-            function: function_,
-            span_id: currentEntry.span_id, // May be undefined
-            depth: depth,
-            timestamp: Date.now() / 1000,
-            evaluation_runs: [evalRunPayload], // Embed the payload
-            duration: duration,
-            span_type: "evaluation"
-        });
-    }
     // OPTIONAL: Add a method to get the original name if needed elsewhere
     getOriginalName() {
         return this.originalName;
@@ -796,11 +885,6 @@ class Tracer {
             apiKey: this.apiKey,
             organizationId: this.organizationId,
         });
-        if (traceClient.enableMonitoring) {
-            traceClient.save(true).catch(err => {
-                logger.error(`Failed to save empty trace (${traceClient.traceId}):`, err);
-            });
-        }
         return traceClient;
     }
     *trace(name, options = {}) {