npm - judgeval - Versions diffs - 0.1.41 → 0.2.0 - Mend

judgeval 0.1.41 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/README.md +10 -5
package/dist/cjs/clients.js.map +1 -1
package/dist/cjs/common/integrations/langgraph.js +141 -367
package/dist/cjs/common/integrations/langgraph.js.map +1 -1
package/dist/cjs/common/logger.js +6 -6
package/dist/cjs/common/logger.js.map +1 -1
package/dist/cjs/common/tracer.js +300 -317
package/dist/cjs/common/tracer.js.map +1 -1
package/dist/cjs/evaluation-run.js +9 -9
package/dist/cjs/evaluation-run.js.map +1 -1
package/dist/cjs/index.js +54 -54
package/dist/cjs/index.js.map +1 -1
package/dist/cjs/judgment-client.js +73 -56
package/dist/cjs/judgment-client.js.map +1 -1
package/dist/cjs/rules.js +8 -8
package/dist/cjs/rules.js.map +1 -1
package/dist/cjs/run-evaluation.js +60 -60
package/dist/cjs/run-evaluation.js.map +1 -1
package/dist/cjs/scorers/api-scorer.js +15 -15
package/dist/cjs/scorers/api-scorer.js.map +1 -1
package/dist/cjs/scorers/base-scorer.js +4 -4
package/dist/cjs/scorers/base-scorer.js.map +1 -1
package/dist/cjs/scorers/exact-match-scorer.js +2 -2
package/dist/cjs/scorers/exact-match-scorer.js.map +1 -1
package/dist/esm/clients.js.map +1 -1
package/dist/esm/common/integrations/langgraph.js +142 -371
package/dist/esm/common/integrations/langgraph.js.map +1 -1
package/dist/esm/common/logger.js +1 -1
package/dist/esm/common/logger.js.map +1 -1
package/dist/esm/common/tracer.js +283 -298
package/dist/esm/common/tracer.js.map +1 -1
package/dist/esm/evaluation-run.js +3 -3
package/dist/esm/evaluation-run.js.map +1 -1
package/dist/esm/index.js +12 -12
package/dist/esm/index.js.map +1 -1
package/dist/esm/judgment-client.js +33 -16
package/dist/esm/judgment-client.js.map +1 -1
package/dist/esm/rules.js +7 -7
package/dist/esm/rules.js.map +1 -1
package/dist/esm/run-evaluation.js +4 -4
package/dist/esm/run-evaluation.js.map +1 -1
package/dist/esm/scorers/api-scorer.js +1 -1
package/dist/esm/scorers/api-scorer.js.map +1 -1
package/dist/esm/scorers/base-scorer.js +1 -1
package/dist/esm/scorers/base-scorer.js.map +1 -1
package/dist/esm/scorers/exact-match-scorer.js +1 -1
package/dist/esm/scorers/exact-match-scorer.js.map +1 -1
package/dist/types/clients.d.ts +1 -2
package/dist/types/common/integrations/langgraph.d.ts +22 -30
package/dist/types/common/tracer.d.ts +23 -21
package/dist/types/data/result.d.ts +1 -1
package/dist/types/evaluation-run.d.ts +3 -3
package/dist/types/index.d.ts +12 -12
package/dist/types/judgment-client.d.ts +4 -4
package/dist/types/rules.d.ts +3 -3
package/dist/types/run-evaluation.d.ts +4 -4
package/dist/types/scorers/api-scorer.d.ts +3 -3
package/dist/types/scorers/base-scorer.d.ts +2 -2
package/dist/types/scorers/exact-match-scorer.d.ts +3 -3
package/package.json +6 -3

package/README.md CHANGED Viewed

@@ -131,12 +131,17 @@ const tracer = Tracer.getInstance({
   enableEvaluations: true
 });
-await tracer.runInTrace({ name: "my-trace" }, async (trace) => {
-  // Run operations within the trace
-  await trace.runInSpan("operation", { spanType: "tool" }, async () => {
+// Analogous to Python SDK's with, e.g.
+//
+// with tracer.trace("my-trace") as trace:
+//   with trace.span("operation") as span:
+//     # Perform operations
+//
+for (const trace of tracer.trace("my-trace")) {
+  for (const span of trace.span("operation")) {
     // Perform operations
-  });
-});
+  }
+}
 ```
 ## Result Retrieval

package/dist/cjs/clients.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"clients.js","sourceRoot":"","sources":["../../src/clients.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AACjC,oDAA4B;AAC5B,4DAA0C;AAC1C,8DAAmC;AAEnC,6BAA6B;AAC7B,MAAM,CAAC,MAAM,EAAE,CAAC;AAEhB,oCAAoC;AACpC,IAAI,YAAY,GAAkB,IAAI,CAAC;AAsC9B,oCAAY;AArCrB,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,uBAAA,YAAY,GAAG,IAAI,gBAAM,CAAC;YACxB,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,qDAAqD;QACrD,OAAO,CAAC,IAAI,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,uCAAuC;AACvC,IAAI,eAAe,GAAqB,IAAI,CAAC;AAyBtB,0CAAe;AAxBtC,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;IAClC,IAAI,CAAC;QACH,0BAAA,eAAe,GAAG,IAAI,aAAS,CAAC;YAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;SACtC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wDAAwD;QACxD,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC;AAED,sCAAsC;AACtC,IAAI,cAAc,~~GAAoB~~,IAAI,CAAC;~~AAYH~~,wCAAc;AAXtD,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IACjC,IAAI,CAAC;QACH,yBAAA,cAAc,GAAG,~~IAAI~~,~~qBAAQ~~,CAAC;~~YAC5B~~,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uDAAuD;QACvD,OAAO,CAAC,IAAI,CAAC,qCAAqC,EAAE,KAAK,CAAC,CAAC;IAC7D,CAAC;AACH,CAAC"}
1	+ {"version":3,"file":"clients.js","sourceRoot":"","sources":["../../src/clients.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AACjC,oDAA4B;AAC5B,4DAA0C;AAC1C,8DAAmC;AAEnC,6BAA6B;AAC7B,MAAM,CAAC,MAAM,EAAE,CAAC;AAEhB,oCAAoC;AACpC,IAAI,YAAY,GAAkB,IAAI,CAAC;AAsC9B,oCAAY;AArCrB,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,uBAAA,YAAY,GAAG,IAAI,gBAAM,CAAC;YACxB,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,qDAAqD;QACrD,OAAO,CAAC,IAAI,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,uCAAuC;AACvC,IAAI,eAAe,GAAqB,IAAI,CAAC;AAyBtB,0CAAe;AAxBtC,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;IAClC,IAAI,CAAC;QACH,0BAAA,eAAe,GAAG,IAAI,aAAS,CAAC;YAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;SACtC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wDAAwD;QACxD,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC;AAED,sCAAsC;AACtC,IAAI,cAAc,GAAe,IAAI,CAAC;AAYE,wCAAc;AAXtD,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IACjC,IAAI,CAAC;QACH,yBAAA,cAAc,GAAG,IAAK,qBAAgB,CAAC;YACrC,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uDAAuD;QACvD,OAAO,CAAC,IAAI,CAAC,qCAAqC,EAAE,KAAK,CAAC,CAAC;IAC7D,CAAC;AACH,CAAC"}

package/dist/cjs/common/integrations/langgraph.js CHANGED Viewed

@@ -1,5 +1,4 @@
 "use strict";
-// judgeval-js/src/integrations/langgraph.ts
 var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
     function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
     return new (P || (P = Promise))(function (resolve, reject) {
@@ -12,431 +11,206 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.JudgevalLanggraphCallbackHandler = void 0;
 const base_1 = require("@langchain/core/callbacks/base");
-const messages_1 = require("@langchain/core/messages"); // Regular import for BaseMessage value
-const uuid_1 = require("uuid");
-const tracer_1 = require("../tracer"); // Adjust path
-// --- Global Handler Setup (REMOVED - No longer needed with context propagation) ---
+const tracer_js_1 = require("../tracer.js");
+// It's my understanding that you can only be on one node in the graph at a time
+// That means we don't need to worry about the async problem
 class JudgevalLanggraphCallbackHandler extends base_1.BaseCallbackHandler {
-    // Optional: Track executed nodes/tools if needed for external use cases like evaluation
-    // public executedNodes: string[] = [];
-    // public executedTools: string[] = [];
     constructor(tracer) {
-        super(); // Call parent constructor
-        this.name = "judgeval_langgraph_callback_handler"; // Identifier for the handler
-        this.spanStartTimes = {}; // Store start time per spanId (using Judgeval spanId)
-        this.runIdToSpanId = {}; // Map Langchain runId to Judgeval spanId
-        this.tracer = tracer !== null && tracer !== void 0 ? tracer : tracer_1.Tracer.getInstance(); // Use provided or singleton tracer
-        // No need to get traceClient here, will be fetched from context in methods
+        super();
+        this.name = "judgeval_langgraph_callback_handler";
+        this.executedNodeTools = [];
+        this.executedNodes = [];
+        this.executedTools = [];
+        this.tracer = tracer !== null && tracer !== void 0 ? tracer : tracer_js_1.Tracer.getInstance();
         console.log(`[Judgeval Handler] Initialized. Monitoring Enabled: ${this.tracer.enableMonitoring}`); // Added prefix
     }
-    // Helper to safely get the current TraceClient from context
-    _getActiveTraceClient() {
-        if (!this.tracer.enableMonitoring) {
-            // console.log("Judgeval Handler: Monitoring disabled."); // Reduce noise
-            return null;
-        }
+    getTraceClient() {
+        if (!this.tracer.enableMonitoring)
+            return undefined;
         const client = this.tracer.getCurrentTrace();
         if (!client) {
-            console.log("[Judgeval Handler] _getActiveTraceClient: No active trace client found in context."); // Added log
-        }
-        // Explicitly return null if client is undefined
-        return client !== null && client !== void 0 ? client : null;
-    }
-    // Helper to create a unique span ID
-    _generateSpanId() {
-        return (0, uuid_1.v4)();
-    }
-    // Start a new span, associating it with the LangChain runId
-    _startSpan(lcRunId, name, spanType = "span") {
-        var _a;
-        const traceClient = this._getActiveTraceClient();
-        if (!traceClient)
-            return;
-        const startTime = Date.now() / 1000;
-        const spanId = this._generateSpanId();
-        this.runIdToSpanId[lcRunId] = spanId; // Map Langchain runId to our new spanId
-        // Get parent span ID from the current async context
-        const parentSpanId = tracer_1.currentSpanAsyncLocalStorage.getStore();
-        // Calculate depth based on parent
-        let depth = 0;
-        if (parentSpanId) {
-            const parentEntry = traceClient.entries.find((e) => e.span_id === parentSpanId && e.type === 'enter');
-            if (parentEntry) {
-                depth = ((_a = parentEntry.depth) !== null && _a !== void 0 ? _a : -1) + 1; // Increment parent depth
-            }
-            else {
-                // If parent entry not found (should be rare in async context), start at 0
-                console.warn(`Judgeval Handler: Parent span entry ${parentSpanId} not found for child ${spanId}. Defaulting depth to 0.`);
-                depth = 0;
-            }
-        }
-        else {
-            // No parent in context, this is a root span (relative to this handler's context)
-            depth = 0;
+            console.warn("No trace client found");
         }
-        // console.log(`>>> _startSpan: Creating span ${spanId} ('${name}') depth: ${depth}, parent: ${parentSpanId ?? 'None'}, lcRunId: ${lcRunId}`); // Debug log
-        traceClient.addEntry({
-            type: 'enter',
-            function: name,
-            span_id: spanId,
-            depth: depth,
-            timestamp: startTime,
-            span_type: spanType,
-            parent_span_id: parentSpanId
-        });
-        this.spanStartTimes[spanId] = startTime;
-        // Set this new span as the current one in the context *for child operations*
-        // Note: This relies on Langchain's async flow preserving the context.
-        // If Langchain breaks context, this might not propagate correctly.
-        // It's generally better to *read* the parent from context when starting a span,
-        // rather than trying to manage pushing/popping onto the context here.
-        // The Tracer's observe/runInTrace methods handle setting the context.
+        return client;
     }
-    // End the span corresponding to the LangChain runId
-    _endSpan(lcRunId, output, error) {
-        const traceClient = this._getActiveTraceClient();
-        // Retrieve the spanId using the LangChain runId
-        const spanId = this.runIdToSpanId[lcRunId];
-        if (!traceClient || !spanId || !(spanId in this.spanStartTimes)) {
-            // console.log(`>>> _endSpan: Skipping endSpan for lcRunId ${lcRunId} / spanId ${spanId} - ID/trace/time missing or monitoring disabled.`); // Debug log
-            // Clean up the map even if we can't end the span fully
-            if (lcRunId in this.runIdToSpanId)
-                delete this.runIdToSpanId[lcRunId];
-            return;
-        }
-        const startTime = this.spanStartTimes[spanId];
-        const endTime = Date.now() / 1000;
-        const duration = endTime - startTime;
-        // Find the original 'enter' entry to get details like name and depth
-        const enterEntry = traceClient.entries.find(e => e.span_id === spanId && e.type === 'enter');
-        if (!enterEntry) {
-            console.warn(`Judgeval Handler: Could not find 'enter' entry for span ${spanId} (lcRunId: ${lcRunId}) during _endSpan.`);
-            // Clean up maps even if entry isn't found
-            delete this.spanStartTimes[spanId];
-            delete this.runIdToSpanId[lcRunId];
-            return;
-        }
-        // Record output or error *before* the exit entry
-        if (error) {
-            traceClient.recordOutput(error instanceof Error ? error : new Error(String(error)));
-        }
-        else if (output !== undefined) {
-            // Avoid recording 'undefined' as output explicitly
-            traceClient.recordOutput(output);
-        }
-        traceClient.addEntry({
-            type: 'exit',
-            function: enterEntry.function, // Use name from 'enter' entry
-            span_id: spanId,
-            depth: enterEntry.depth, // Use depth from 'enter' entry
-            timestamp: endTime,
-            duration: duration,
-            span_type: enterEntry.span_type // Use type from 'enter' entry
-        });
-        // Clean up maps
-        delete this.spanStartTimes[spanId];
-        delete this.runIdToSpanId[lcRunId];
-        // console.log(`>>> _endSpan: Ended span ${spanId} ('${enterEntry.function}'), lcRunId: ${lcRunId}`); // Debug log
-    }
-    // --- Chain Events ---
-    onChainStart(serialized, inputs, runId, parentRunId, tags, metadata, options // Langchain-JS doesn't seem to pass options here consistently
-    ) {
+    handleRetrieverStart(serialized, query, runId, parentRunId, tags, metadata, name, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a;
-            console.log(`[Judgeval Handler] onChainStart called for runId: ${runId}`); // Added log
-            // console.log(`>>> onChainStart: runId: ${runId}, parentRunId: ${parentRunId}, metadata: ${JSON.stringify(metadata)}, tags: ${JSON.stringify(tags)}`); // Debug log
-            const traceClient = this._getActiveTraceClient();
+            let name_ = "RETRIEVER_CALL";
+            if (serialized === null || serialized === void 0 ? void 0 : serialized.name) {
+                name_ = `RETRIEVER_${serialized.name.toUpperCase()}`;
+            }
+            const traceClient = this.getTraceClient();
             if (!traceClient)
                 return;
-            let spanName;
-            const spanType = "chain"; // Keep type as chain
-            // Determine span name based on Python logic: prioritize 'LangGraph' root, then serialized name
-            const executionName = (_a = serialized === null || serialized === void 0 ? void 0 : serialized.name) !== null && _a !== void 0 ? _a : 'Unknown Chain';
-            if (executionName === 'LangGraph') {
-                spanName = 'LangGraph'; // Match Python root span name
-            }
-            else {
-                // Use the serialized name or a generic fallback, avoiding node-specific prefixes
-                spanName = executionName;
-            }
-            // Removed node-specific logic:
-            // const nodeName = metadata?.langgraph_node ? String(metadata.langgraph_node) : null;
-            // if (nodeName) { ... } else { ... }
-            this._startSpan(runId, spanName, spanType);
-            // Record input associated with the started span
-            const currentSpanId = this.runIdToSpanId[runId];
-            if (currentSpanId) {
-                // Input is recorded in the current context span of the TraceClient
-                traceClient.recordInput({ args: inputs /* , options: options */ }); // Removed spanId
-            }
+            traceClient.startSpan(name_, { spanType: "retriever" });
+            traceClient.recordInput({
+                query,
+                tags,
+                metadata,
+                options,
+            });
         });
     }
-    onChainEnd(outputs, // Output can sometimes be a simple string
-    runId, parentRunId, tags) {
+    handleRetrieverEnd(documents, runId, parentRunId, tags, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            // console.log(`>>> onChainEnd: runId: ${runId}`); // Debug log
-            // Output is recorded within _endSpan
-            this._endSpan(runId, outputs);
+            const docSummary = documents.map((doc, i) => ({
+                index: i,
+                page_content: doc.pageContent.length > 100
+                    ? doc.pageContent.substring(0, 97) + "..."
+                    : doc.pageContent,
+                metadata: doc.metadata,
+            }));
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
+                return;
+            traceClient.recordOutput({
+                document_count: documents.length,
+                documents: docSummary,
+            });
+            traceClient.endSpan();
         });
     }
-    onChainError(error, runId, parentRunId, tags) {
+    handleRetrieverError(error, runId, parentRunId, tags, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            // console.error(`>>> onChainError: runId: ${runId}`, error); // Debug log
-            // Error is recorded within _endSpan
-            this._endSpan(runId, undefined, error);
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
+                return;
+            traceClient.recordError(error);
         });
     }
-    // --- LLM Events ---
-    _getLlmSpanName(serialized) {
-        var _a;
-        // Simplify extraction if possible, check common patterns
-        const idPath = ((_a = serialized === null || serialized === void 0 ? void 0 : serialized.id) !== null && _a !== void 0 ? _a : []).join('/').toLowerCase();
-        if (idPath.includes("openai"))
-            return "OPENAI_API_CALL";
-        if (idPath.includes("anthropic"))
-            return "ANTHROPIC_API_CALL";
-        if (idPath.includes("together"))
-            return "TOGETHER_API_CALL";
-        // Add other common providers if needed (e.g., google, bedrock)
-        return "LLM_CALL"; // Default
-    }
-    // Generic LLM Start handler (covers both base LLM and ChatModel)
-    _handleLlmStart(serialized, runId, inputData, extraParams,
-    // tags?: string[] | undefined, // Often unused for LLM spans
-    // metadata?: Record<string, unknown> | undefined, // Often unused for LLM spans
-    options // Langchain passes invocation params here
-    ) {
+    handleChainStart(serialized, inputs, runId, parentRunId, tags, metadata, name, runName, runType, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b, _c, _d, _e;
-            // console.log(`>>> _handleLlmStart: runId: ${runId}`); // Debug log
-            const traceClient = this._getActiveTraceClient();
-            if (!traceClient)
+            let traceClient = this.getTraceClient();
+            if (!traceClient) {
+                console.warn("No trace client found");
                 return;
-            // Extract model name from options (common pattern) or extraParams
-            const invocationParams = (_b = (_a = options === null || options === void 0 ? void 0 : options.invocation_params) !== null && _a !== void 0 ? _a : extraParams) !== null && _b !== void 0 ? _b : {};
-            const modelName = (_d = (_c = invocationParams === null || invocationParams === void 0 ? void 0 : invocationParams.model_name) !== null && _c !== void 0 ? _c : invocationParams === null || invocationParams === void 0 ? void 0 : invocationParams.model) !== null && _d !== void 0 ? _d : 'unknown_model';
-            const spanName = this._getLlmSpanName(serialized);
-            this._startSpan(runId, spanName, "llm");
-            // Prepare input payload
-            let inputPayload = {
-                model: modelName,
-                params: invocationParams, // Record all invocation params
-                // options: options // May include other config besides invocation_params
-            };
-            if ('prompts' in inputData) {
-                inputPayload.prompts = inputData.prompts;
-            }
-            else if ('messages' in inputData) {
-                // Langchain JS passes messages as BaseMessage[][]
-                inputPayload.messages = (_e = inputData.messages[0]) !== null && _e !== void 0 ? _e : []; // Extract first batch element safely
-            }
-            // Record input associated with the started span
-            const currentSpanId = this.runIdToSpanId[runId];
-            if (currentSpanId) {
-                // Input is recorded in the current context span of the TraceClient
-                traceClient.recordInput(inputPayload); // Removed spanId
             }
+            traceClient.startSpan(name !== null && name !== void 0 ? name : "unknown_chain", { spanType: "chain" });
+            traceClient.recordInput(inputs);
         });
     }
-    onLlmStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, options // options might contain invocation_params
-    ) {
+    handleChainEnd(outputs, runId, parentRunId, tags, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            console.log(`[Judgeval Handler] onLlmStart called for runId: ${runId}`); // Added log
-            yield this._handleLlmStart(serialized, runId, { prompts }, extraParams, options);
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
+                return;
+            traceClient.recordOutput(outputs);
+            traceClient.endSpan();
         });
     }
-    // Handles Chat Model start specifically
-    onChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, options // options might contain invocation_params
-    ) {
+    handleChainError(error, runId, parentRunId, tags, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            console.log(`[Judgeval Handler] onChatModelStart called for runId: ${runId}`); // Added log
-            yield this._handleLlmStart(serialized, runId, { messages }, extraParams, options);
+            console.log(`Chain error: ${error}`);
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
+                return;
+            traceClient.recordError(error);
+            traceClient.endSpan();
         });
     }
-    /**
-     * Handles the end of an LLM call. Extracts the output, usage data, and ends the corresponding span.
-     * @param output The result from the LLM call.
-     * @param runId The unique ID of the run.
-     */
-    onLlmEnd(output, runId) {
+    handleToolStart(serialized, inputStr, runId, parentRunId, tags, metadata, name, runType, runName, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
-            // console.log(`>>> onLlmEnd: runId: ${runId}`); // Debug log
-            const traceClient = this._getActiveTraceClient();
-            const spanId = this.runIdToSpanId[runId]; // Needed for context if recording output directly here
-            if (!traceClient || !spanId) {
-                // console.warn(`Judgeval Handler: Skipping onLlmEnd for runId ${runId}. Trace client or span ID missing.`); // Debug log
-                this._endSpan(runId, output); // Still attempt to end span if possible, passing raw output
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
                 return;
+            // Python SDK doesn't handle name None case
+            traceClient.startSpan(name !== null && name !== void 0 ? name : "unknown_tool", { spanType: "tool" });
+            if (name) {
+                this.executedTools.push(name);
+                this.executedNodeTools.push(this.previousNode ? `${this.previousNode}:${name}` : name);
             }
-            // Process LLMResult to extract relevant data
-            const generation = (_b = (_a = output.generations) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b[0];
-            let llmOutputPayload = {};
-            if (generation) {
-                // Handle ChatGeneration vs regular Generation
-                if ("message" in generation && generation.message instanceof messages_1.BaseMessage) {
-                    const aiMessage = generation.message; // Assume AI message for output
-                    llmOutputPayload.content = aiMessage.content;
-                    // Include tool calls if present
-                    if (aiMessage.tool_calls && aiMessage.tool_calls.length > 0) {
-                        llmOutputPayload.tool_calls = aiMessage.tool_calls;
-                    }
-                    if (aiMessage.invalid_tool_calls && aiMessage.invalid_tool_calls.length > 0) {
-                        llmOutputPayload.invalid_tool_calls = aiMessage.invalid_tool_calls;
-                    }
-                    // Usage metadata might be here (e.g., OpenAI)
-                    if (aiMessage.usage_metadata) {
-                        llmOutputPayload.usage_metadata = aiMessage.usage_metadata;
-                    }
-                }
-                else if (generation.text) {
-                    // Handle plain text generation
-                    llmOutputPayload.content = generation.text;
-                }
-                // Standardize token usage extraction if not in usage_metadata
-                // Check generationInfo first, then llmOutput
-                const tokenUsage = (_d = (_c = generation.generationInfo) === null || _c === void 0 ? void 0 : _c.tokenUsage) !== null && _d !== void 0 ? _d : (_e = output.llmOutput) === null || _e === void 0 ? void 0 : _e.tokenUsage;
-                if (tokenUsage && !((_f = llmOutputPayload.usage_metadata) === null || _f === void 0 ? void 0 : _f.token_usage)) { // Avoid duplication if already in usage_metadata
-                    llmOutputPayload.token_usage = {
-                        completionTokens: (_g = tokenUsage.completionTokens) !== null && _g !== void 0 ? _g : tokenUsage.completion_tokens,
-                        promptTokens: (_h = tokenUsage.promptTokens) !== null && _h !== void 0 ? _h : tokenUsage.prompt_tokens,
-                        totalTokens: (_j = tokenUsage.totalTokens) !== null && _j !== void 0 ? _j : tokenUsage.total_tokens,
-                    };
-                    // Normalize keys within usage_metadata if present
-                }
-                else if ((_k = llmOutputPayload.usage_metadata) === null || _k === void 0 ? void 0 : _k.token_usage) {
-                    const usageMeta = llmOutputPayload.usage_metadata.token_usage;
-                    llmOutputPayload.token_usage = {
-                        completionTokens: (_l = usageMeta.completionTokens) !== null && _l !== void 0 ? _l : usageMeta.completion_tokens,
-                        promptTokens: (_m = usageMeta.promptTokens) !== null && _m !== void 0 ? _m : usageMeta.prompt_tokens,
-                        totalTokens: (_o = usageMeta.totalTokens) !== null && _o !== void 0 ? _o : usageMeta.total_tokens,
-                    };
-                }
-                // Include other generationInfo if available and potentially useful
-                if (generation.generationInfo) {
-                    llmOutputPayload.generation_info = generation.generationInfo;
-                }
-            }
-            // Include raw LLM output if available and potentially useful (can be verbose)
-            // if (output.llmOutput) {
-            //      llmOutputPayload.raw_llm_output = output.llmOutput;
-            // }
-            // Output is recorded within _endSpan
-            this._endSpan(runId, llmOutputPayload);
+            traceClient.recordInput({
+                args: inputStr,
+                kwargs: options,
+            });
         });
     }
-    onLlmError(error, runId, parentRunId) {
+    handleToolEnd(output, runId, parentRunId, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            // console.error(`>>> onLlmError: runId: ${runId}`, error); // Debug log
-            // Error is recorded within _endSpan
-            this._endSpan(runId, undefined, error);
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
+                return;
+            traceClient.recordOutput(output);
+            traceClient.endSpan();
         });
     }
-    // --- Tool Events ---
-    onToolStart(serialized, inputStr, // input is often a stringified object
-    runId, parentRunId, tags, metadata) {
+    handleToolError(error, runId, parentRunId, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a;
-            console.log(`[Judgeval Handler] onToolStart called for runId: ${runId}`); // Added log
-            // console.log(`>>> onToolStart: runId: ${runId}, name: ${serialized?.name}`); // Debug log
-            const traceClient = this._getActiveTraceClient();
+            console.log(`Tool error: ${error}`);
+            const traceClient = this.getTraceClient();
             if (!traceClient)
                 return;
-            // Match Python: Use the tool name directly as the span name
-            const toolName = (_a = serialized === null || serialized === void 0 ? void 0 : serialized.name) !== null && _a !== void 0 ? _a : 'Unknown Tool';
-            const spanName = toolName; // Removed "TOOL: " prefix
-            this._startSpan(runId, spanName, "tool");
-            // Try to parse inputStr if it's JSON, otherwise keep as string
-            let parsedInput = inputStr;
-            try {
-                // Avoid parsing null/empty strings
-                if (inputStr && inputStr.trim().startsWith('{') && inputStr.trim().endsWith('}')) {
-                    parsedInput = JSON.parse(inputStr);
-                }
-            }
-            catch (e) {
-                // Ignore error, keep as string if parsing fails
-            }
-            // Record input associated with the started span
-            const currentSpanId = this.runIdToSpanId[runId];
-            if (currentSpanId) {
-                // Input is recorded in the current context span of the TraceClient
-                traceClient.recordInput({ input: parsedInput /* , options: options */ }); // Removed spanId
-            }
-            // Track tool execution (if needed externally)
-            // this.executedTools.push(toolName); // Example
+            traceClient.recordError(error);
+            traceClient.endSpan();
         });
     }
-    onToolEnd(output, // Tool output is typically a string
-    runId, parentRunId) {
+    handleAgentAction(action, runId, parentRunId, tags, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            // console.log(`>>> onToolEnd: runId: ${runId}`); // Debug log
-            // Output is recorded within _endSpan
-            this._endSpan(runId, output);
+            console.log(`Agent action: ${action}`);
         });
     }
-    onToolError(error, runId, parentRunId) {
+    handleAgentFinish(finish, runId, parentRunId, tags, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            // console.error(`>>> onToolError: runId: ${runId}`, error); // Debug log
-            // Error is recorded within _endSpan
-            this._endSpan(runId, undefined, error);
+            console.log(`Agent finish: ${finish}`);
         });
     }
-    // --- Retriever Events ---
-    onRetrieverStart(serialized, query, runId, parentRunId, tags, metadata) {
+    handleLLMStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, runName, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            console.log(`[Judgeval Handler] onRetrieverStart called for runId: ${runId}`); // Added log
-            // console.log(`>>> onRetrieverStart: runId: ${runId}, name: ${serialized?.name}`); // Debug log
-            const traceClient = this._getActiveTraceClient();
+            const name = "LLM call";
+            const traceClient = this.getTraceClient();
             if (!traceClient)
                 return;
-            // Match Python naming convention
-            const retrieverName = serialized === null || serialized === void 0 ? void 0 : serialized.name;
-            let spanName;
-            if (retrieverName) {
-                spanName = `RETRIEVER_${retrieverName.toUpperCase()}`;
-            }
-            else {
-                spanName = "RETRIEVER_CALL";
-            }
-            // const spanName = `RETRIEVER: ${retrieverName}`; // Old naming
-            this._startSpan(runId, spanName, "retriever"); // Use 'retriever' span type
-            // Record input associated with the started span
-            const currentSpanId = this.runIdToSpanId[runId];
-            if (currentSpanId) {
-                // Input is recorded in the current context span of the TraceClient
-                traceClient.recordInput({ query: query /* , options: options */ }); // Removed spanId
-            }
+            traceClient.startSpan(name, { spanType: "llm" });
+            traceClient.recordInput({
+                args: prompts,
+                kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata }, options),
+            });
         });
     }
-    onRetrieverEnd(documents, runId, parentRunId, tags) {
+    // Also called on chat model end
+    handleLLMEnd(output, runId, parentRunId, tags, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            // console.log(`>>> onRetrieverEnd: runId: ${runId}, docs: ${documents.length}`); // Debug log
-            const traceClient = this._getActiveTraceClient();
-            if (!traceClient) {
-                // If no trace client, we still need to clean up the runId mapping potentially
-                this._endSpan(runId);
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
                 return;
-            }
-            // Format output similar to Python's handler
-            const docSummary = documents.map((doc, i) => ({
-                index: i,
-                page_content: doc.pageContent.substring(0, 150) + (doc.pageContent.length > 150 ? "..." : ""), // Slightly longer preview
-                metadata: doc.metadata,
-            }));
-            const output = {
-                document_count: documents.length,
-                documents: docSummary,
-            };
-            // Output is recorded within _endSpan
-            this._endSpan(runId, output);
+            traceClient.recordOutput(output.generations[0][0].text);
+            traceClient.endSpan();
+        });
+    }
+    handleLLMError(error, runId, parentRunId, tags, options) {
+        return __awaiter(this, void 0, void 0, function* () {
+            console.log(`LLM error: ${error}`);
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
+                return;
+            traceClient.recordError(error);
+            traceClient.endSpan();
         });
     }
-    onRetrieverError(error, runId, parentRunId, tags) {
+    // Why is there no handleChatModelEnd?
+    handleChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, name, runType, runName, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            // console.error(`>>> onRetrieverError: runId: ${runId}`, error); // Debug log
-            // Error is recorded within _endSpan
-            this._endSpan(runId, undefined, error);
+            let name_ = "LLM call";
+            if (serialized.id.includes("openai")) {
+                name_ = "OPENAI_API_CALL";
+            }
+            else if (serialized.id.includes("anthropic")) {
+                name_ = "ANTHROPIC_API_CALL";
+            }
+            else if (serialized.id.includes("together")) {
+                name_ = "TOGETHER_API_CALL";
+            }
+            else {
+                name_ = "LLM call";
+            }
+            const traceClient = this.getTraceClient();
+            if (!traceClient)
+                return;
+            traceClient.startSpan(name_, { spanType: "llm" });
+            traceClient.recordInput({
+                args: messages,
+                kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata }, options),
+            });
         });
     }
 }