judgeval 0.1.41 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +10 -5
  2. package/dist/cjs/clients.js.map +1 -1
  3. package/dist/cjs/common/integrations/langgraph.js +141 -367
  4. package/dist/cjs/common/integrations/langgraph.js.map +1 -1
  5. package/dist/cjs/common/logger.js +6 -6
  6. package/dist/cjs/common/logger.js.map +1 -1
  7. package/dist/cjs/common/tracer.js +300 -317
  8. package/dist/cjs/common/tracer.js.map +1 -1
  9. package/dist/cjs/evaluation-run.js +9 -9
  10. package/dist/cjs/evaluation-run.js.map +1 -1
  11. package/dist/cjs/index.js +54 -54
  12. package/dist/cjs/index.js.map +1 -1
  13. package/dist/cjs/judgment-client.js +73 -56
  14. package/dist/cjs/judgment-client.js.map +1 -1
  15. package/dist/cjs/rules.js +8 -8
  16. package/dist/cjs/rules.js.map +1 -1
  17. package/dist/cjs/run-evaluation.js +60 -60
  18. package/dist/cjs/run-evaluation.js.map +1 -1
  19. package/dist/cjs/scorers/api-scorer.js +15 -15
  20. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  21. package/dist/cjs/scorers/base-scorer.js +4 -4
  22. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  23. package/dist/cjs/scorers/exact-match-scorer.js +2 -2
  24. package/dist/cjs/scorers/exact-match-scorer.js.map +1 -1
  25. package/dist/esm/clients.js.map +1 -1
  26. package/dist/esm/common/integrations/langgraph.js +142 -371
  27. package/dist/esm/common/integrations/langgraph.js.map +1 -1
  28. package/dist/esm/common/logger.js +1 -1
  29. package/dist/esm/common/logger.js.map +1 -1
  30. package/dist/esm/common/tracer.js +283 -298
  31. package/dist/esm/common/tracer.js.map +1 -1
  32. package/dist/esm/evaluation-run.js +3 -3
  33. package/dist/esm/evaluation-run.js.map +1 -1
  34. package/dist/esm/index.js +12 -12
  35. package/dist/esm/index.js.map +1 -1
  36. package/dist/esm/judgment-client.js +33 -16
  37. package/dist/esm/judgment-client.js.map +1 -1
  38. package/dist/esm/rules.js +7 -7
  39. package/dist/esm/rules.js.map +1 -1
  40. package/dist/esm/run-evaluation.js +4 -4
  41. package/dist/esm/run-evaluation.js.map +1 -1
  42. package/dist/esm/scorers/api-scorer.js +1 -1
  43. package/dist/esm/scorers/api-scorer.js.map +1 -1
  44. package/dist/esm/scorers/base-scorer.js +1 -1
  45. package/dist/esm/scorers/base-scorer.js.map +1 -1
  46. package/dist/esm/scorers/exact-match-scorer.js +1 -1
  47. package/dist/esm/scorers/exact-match-scorer.js.map +1 -1
  48. package/dist/types/clients.d.ts +1 -2
  49. package/dist/types/common/integrations/langgraph.d.ts +22 -30
  50. package/dist/types/common/tracer.d.ts +23 -21
  51. package/dist/types/data/result.d.ts +1 -1
  52. package/dist/types/evaluation-run.d.ts +3 -3
  53. package/dist/types/index.d.ts +12 -12
  54. package/dist/types/judgment-client.d.ts +4 -4
  55. package/dist/types/rules.d.ts +3 -3
  56. package/dist/types/run-evaluation.d.ts +4 -4
  57. package/dist/types/scorers/api-scorer.d.ts +3 -3
  58. package/dist/types/scorers/base-scorer.d.ts +2 -2
  59. package/dist/types/scorers/exact-match-scorer.d.ts +3 -3
  60. package/package.json +6 -3
package/README.md CHANGED
@@ -131,12 +131,17 @@ const tracer = Tracer.getInstance({
131
131
  enableEvaluations: true
132
132
  });
133
133
 
134
- await tracer.runInTrace({ name: "my-trace" }, async (trace) => {
135
- // Run operations within the trace
136
- await trace.runInSpan("operation", { spanType: "tool" }, async () => {
134
+ // Analogous to Python SDK's with, e.g.
135
+ //
136
+ // with tracer.trace("my-trace") as trace:
137
+ // with trace.span("operation") as span:
138
+ // # Perform operations
139
+ //
140
+ for (const trace of tracer.trace("my-trace")) {
141
+ for (const span of trace.span("operation")) {
137
142
  // Perform operations
138
- });
139
- });
143
+ }
144
+ }
140
145
  ```
141
146
 
142
147
  ## Result Retrieval
@@ -1 +1 @@
1
- {"version":3,"file":"clients.js","sourceRoot":"","sources":["../../src/clients.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AACjC,oDAA4B;AAC5B,4DAA0C;AAC1C,8DAAmC;AAEnC,6BAA6B;AAC7B,MAAM,CAAC,MAAM,EAAE,CAAC;AAEhB,oCAAoC;AACpC,IAAI,YAAY,GAAkB,IAAI,CAAC;AAsC9B,oCAAY;AArCrB,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,uBAAA,YAAY,GAAG,IAAI,gBAAM,CAAC;YACxB,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,qDAAqD;QACrD,OAAO,CAAC,IAAI,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,uCAAuC;AACvC,IAAI,eAAe,GAAqB,IAAI,CAAC;AAyBtB,0CAAe;AAxBtC,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;IAClC,IAAI,CAAC;QACH,0BAAA,eAAe,GAAG,IAAI,aAAS,CAAC;YAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;SACtC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wDAAwD;QACxD,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC;AAED,sCAAsC;AACtC,IAAI,cAAc,GAAoB,IAAI,CAAC;AAYH,wCAAc;AAXtD,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IACjC,IAAI,CAAC;QACH,yBAAA,cAAc,GAAG,IAAI,qBAAQ,CAAC;YAC5B,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uDAAuD;QACvD,OAAO,CAAC,IAAI,CAAC,qCAAqC,EAAE,KAAK,CAAC,CAAC;IAC7D,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"clients.js","sourceRoot":"","sources":["../../src/clients.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AACjC,oDAA4B;AAC5B,4DAA0C;AAC1C,8DAAmC;AAEnC,6BAA6B;AAC7B,MAAM,CAAC,MAAM,EAAE,CAAC;AAEhB,oCAAoC;AACpC,IAAI,YAAY,GAAkB,IAAI,CAAC;AAsC9B,oCAAY;AArCrB,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,uBAAA,YAAY,GAAG,IAAI,gBAAM,CAAC;YACxB,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,qDAAqD;QACrD,OAAO,CAAC,IAAI,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,uCAAuC;AACvC,IAAI,eAAe,GAAqB,IAAI,CAAC;AAyBtB,0CAAe;AAxBtC,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;IAClC,IAAI,CAAC;QACH,0BAAA,eAAe,GAAG,IAAI,aAAS,CAAC;YAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;SACtC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wDAAwD;QACxD,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC;AAED,sCAAsC;AACtC,IAAI,cAAc,GAAe,IAAI,CAAC;AAYE,wCAAc;AAXtD,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IACjC,IAAI,CAAC;QACH,yBAAA,cAAc,GAAG,IAAK,qBAAgB,CAAC;YACrC,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uDAAuD;QACvD,OAAO,CAAC,IAAI,CAAC,qCAAqC,EAAE,KAAK,CAAC,CAAC;IAC7D,CAAC;AACH,CAAC"}
@@ -1,5 +1,4 @@
1
1
  "use strict";
2
- // judgeval-js/src/integrations/langgraph.ts
3
2
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
4
3
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
5
4
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -12,431 +11,206 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
12
11
  Object.defineProperty(exports, "__esModule", { value: true });
13
12
  exports.JudgevalLanggraphCallbackHandler = void 0;
14
13
  const base_1 = require("@langchain/core/callbacks/base");
15
- const messages_1 = require("@langchain/core/messages"); // Regular import for BaseMessage value
16
- const uuid_1 = require("uuid");
17
- const tracer_1 = require("../tracer"); // Adjust path
18
- // --- Global Handler Setup (REMOVED - No longer needed with context propagation) ---
14
+ const tracer_js_1 = require("../tracer.js");
15
+ // It's my understanding that you can only be on one node in the graph at a time
16
+ // That means we don't need to worry about the async problem
19
17
  class JudgevalLanggraphCallbackHandler extends base_1.BaseCallbackHandler {
20
- // Optional: Track executed nodes/tools if needed for external use cases like evaluation
21
- // public executedNodes: string[] = [];
22
- // public executedTools: string[] = [];
23
18
  constructor(tracer) {
24
- super(); // Call parent constructor
25
- this.name = "judgeval_langgraph_callback_handler"; // Identifier for the handler
26
- this.spanStartTimes = {}; // Store start time per spanId (using Judgeval spanId)
27
- this.runIdToSpanId = {}; // Map Langchain runId to Judgeval spanId
28
- this.tracer = tracer !== null && tracer !== void 0 ? tracer : tracer_1.Tracer.getInstance(); // Use provided or singleton tracer
29
- // No need to get traceClient here, will be fetched from context in methods
19
+ super();
20
+ this.name = "judgeval_langgraph_callback_handler";
21
+ this.executedNodeTools = [];
22
+ this.executedNodes = [];
23
+ this.executedTools = [];
24
+ this.tracer = tracer !== null && tracer !== void 0 ? tracer : tracer_js_1.Tracer.getInstance();
30
25
  console.log(`[Judgeval Handler] Initialized. Monitoring Enabled: ${this.tracer.enableMonitoring}`); // Added prefix
31
26
  }
32
- // Helper to safely get the current TraceClient from context
33
- _getActiveTraceClient() {
34
- if (!this.tracer.enableMonitoring) {
35
- // console.log("Judgeval Handler: Monitoring disabled."); // Reduce noise
36
- return null;
37
- }
27
+ getTraceClient() {
28
+ if (!this.tracer.enableMonitoring)
29
+ return undefined;
38
30
  const client = this.tracer.getCurrentTrace();
39
31
  if (!client) {
40
- console.log("[Judgeval Handler] _getActiveTraceClient: No active trace client found in context."); // Added log
41
- }
42
- // Explicitly return null if client is undefined
43
- return client !== null && client !== void 0 ? client : null;
44
- }
45
- // Helper to create a unique span ID
46
- _generateSpanId() {
47
- return (0, uuid_1.v4)();
48
- }
49
- // Start a new span, associating it with the LangChain runId
50
- _startSpan(lcRunId, name, spanType = "span") {
51
- var _a;
52
- const traceClient = this._getActiveTraceClient();
53
- if (!traceClient)
54
- return;
55
- const startTime = Date.now() / 1000;
56
- const spanId = this._generateSpanId();
57
- this.runIdToSpanId[lcRunId] = spanId; // Map Langchain runId to our new spanId
58
- // Get parent span ID from the current async context
59
- const parentSpanId = tracer_1.currentSpanAsyncLocalStorage.getStore();
60
- // Calculate depth based on parent
61
- let depth = 0;
62
- if (parentSpanId) {
63
- const parentEntry = traceClient.entries.find((e) => e.span_id === parentSpanId && e.type === 'enter');
64
- if (parentEntry) {
65
- depth = ((_a = parentEntry.depth) !== null && _a !== void 0 ? _a : -1) + 1; // Increment parent depth
66
- }
67
- else {
68
- // If parent entry not found (should be rare in async context), start at 0
69
- console.warn(`Judgeval Handler: Parent span entry ${parentSpanId} not found for child ${spanId}. Defaulting depth to 0.`);
70
- depth = 0;
71
- }
72
- }
73
- else {
74
- // No parent in context, this is a root span (relative to this handler's context)
75
- depth = 0;
32
+ console.warn("No trace client found");
76
33
  }
77
- // console.log(`>>> _startSpan: Creating span ${spanId} ('${name}') depth: ${depth}, parent: ${parentSpanId ?? 'None'}, lcRunId: ${lcRunId}`); // Debug log
78
- traceClient.addEntry({
79
- type: 'enter',
80
- function: name,
81
- span_id: spanId,
82
- depth: depth,
83
- timestamp: startTime,
84
- span_type: spanType,
85
- parent_span_id: parentSpanId
86
- });
87
- this.spanStartTimes[spanId] = startTime;
88
- // Set this new span as the current one in the context *for child operations*
89
- // Note: This relies on Langchain's async flow preserving the context.
90
- // If Langchain breaks context, this might not propagate correctly.
91
- // It's generally better to *read* the parent from context when starting a span,
92
- // rather than trying to manage pushing/popping onto the context here.
93
- // The Tracer's observe/runInTrace methods handle setting the context.
34
+ return client;
94
35
  }
95
- // End the span corresponding to the LangChain runId
96
- _endSpan(lcRunId, output, error) {
97
- const traceClient = this._getActiveTraceClient();
98
- // Retrieve the spanId using the LangChain runId
99
- const spanId = this.runIdToSpanId[lcRunId];
100
- if (!traceClient || !spanId || !(spanId in this.spanStartTimes)) {
101
- // console.log(`>>> _endSpan: Skipping endSpan for lcRunId ${lcRunId} / spanId ${spanId} - ID/trace/time missing or monitoring disabled.`); // Debug log
102
- // Clean up the map even if we can't end the span fully
103
- if (lcRunId in this.runIdToSpanId)
104
- delete this.runIdToSpanId[lcRunId];
105
- return;
106
- }
107
- const startTime = this.spanStartTimes[spanId];
108
- const endTime = Date.now() / 1000;
109
- const duration = endTime - startTime;
110
- // Find the original 'enter' entry to get details like name and depth
111
- const enterEntry = traceClient.entries.find(e => e.span_id === spanId && e.type === 'enter');
112
- if (!enterEntry) {
113
- console.warn(`Judgeval Handler: Could not find 'enter' entry for span ${spanId} (lcRunId: ${lcRunId}) during _endSpan.`);
114
- // Clean up maps even if entry isn't found
115
- delete this.spanStartTimes[spanId];
116
- delete this.runIdToSpanId[lcRunId];
117
- return;
118
- }
119
- // Record output or error *before* the exit entry
120
- if (error) {
121
- traceClient.recordOutput(error instanceof Error ? error : new Error(String(error)));
122
- }
123
- else if (output !== undefined) {
124
- // Avoid recording 'undefined' as output explicitly
125
- traceClient.recordOutput(output);
126
- }
127
- traceClient.addEntry({
128
- type: 'exit',
129
- function: enterEntry.function, // Use name from 'enter' entry
130
- span_id: spanId,
131
- depth: enterEntry.depth, // Use depth from 'enter' entry
132
- timestamp: endTime,
133
- duration: duration,
134
- span_type: enterEntry.span_type // Use type from 'enter' entry
135
- });
136
- // Clean up maps
137
- delete this.spanStartTimes[spanId];
138
- delete this.runIdToSpanId[lcRunId];
139
- // console.log(`>>> _endSpan: Ended span ${spanId} ('${enterEntry.function}'), lcRunId: ${lcRunId}`); // Debug log
140
- }
141
- // --- Chain Events ---
142
- onChainStart(serialized, inputs, runId, parentRunId, tags, metadata, options // Langchain-JS doesn't seem to pass options here consistently
143
- ) {
36
+ handleRetrieverStart(serialized, query, runId, parentRunId, tags, metadata, name, options) {
144
37
  return __awaiter(this, void 0, void 0, function* () {
145
- var _a;
146
- console.log(`[Judgeval Handler] onChainStart called for runId: ${runId}`); // Added log
147
- // console.log(`>>> onChainStart: runId: ${runId}, parentRunId: ${parentRunId}, metadata: ${JSON.stringify(metadata)}, tags: ${JSON.stringify(tags)}`); // Debug log
148
- const traceClient = this._getActiveTraceClient();
38
+ let name_ = "RETRIEVER_CALL";
39
+ if (serialized === null || serialized === void 0 ? void 0 : serialized.name) {
40
+ name_ = `RETRIEVER_${serialized.name.toUpperCase()}`;
41
+ }
42
+ const traceClient = this.getTraceClient();
149
43
  if (!traceClient)
150
44
  return;
151
- let spanName;
152
- const spanType = "chain"; // Keep type as chain
153
- // Determine span name based on Python logic: prioritize 'LangGraph' root, then serialized name
154
- const executionName = (_a = serialized === null || serialized === void 0 ? void 0 : serialized.name) !== null && _a !== void 0 ? _a : 'Unknown Chain';
155
- if (executionName === 'LangGraph') {
156
- spanName = 'LangGraph'; // Match Python root span name
157
- }
158
- else {
159
- // Use the serialized name or a generic fallback, avoiding node-specific prefixes
160
- spanName = executionName;
161
- }
162
- // Removed node-specific logic:
163
- // const nodeName = metadata?.langgraph_node ? String(metadata.langgraph_node) : null;
164
- // if (nodeName) { ... } else { ... }
165
- this._startSpan(runId, spanName, spanType);
166
- // Record input associated with the started span
167
- const currentSpanId = this.runIdToSpanId[runId];
168
- if (currentSpanId) {
169
- // Input is recorded in the current context span of the TraceClient
170
- traceClient.recordInput({ args: inputs /* , options: options */ }); // Removed spanId
171
- }
45
+ traceClient.startSpan(name_, { spanType: "retriever" });
46
+ traceClient.recordInput({
47
+ query,
48
+ tags,
49
+ metadata,
50
+ options,
51
+ });
172
52
  });
173
53
  }
174
- onChainEnd(outputs, // Output can sometimes be a simple string
175
- runId, parentRunId, tags) {
54
+ handleRetrieverEnd(documents, runId, parentRunId, tags, options) {
176
55
  return __awaiter(this, void 0, void 0, function* () {
177
- // console.log(`>>> onChainEnd: runId: ${runId}`); // Debug log
178
- // Output is recorded within _endSpan
179
- this._endSpan(runId, outputs);
56
+ const docSummary = documents.map((doc, i) => ({
57
+ index: i,
58
+ page_content: doc.pageContent.length > 100
59
+ ? doc.pageContent.substring(0, 97) + "..."
60
+ : doc.pageContent,
61
+ metadata: doc.metadata,
62
+ }));
63
+ const traceClient = this.getTraceClient();
64
+ if (!traceClient)
65
+ return;
66
+ traceClient.recordOutput({
67
+ document_count: documents.length,
68
+ documents: docSummary,
69
+ });
70
+ traceClient.endSpan();
180
71
  });
181
72
  }
182
- onChainError(error, runId, parentRunId, tags) {
73
+ handleRetrieverError(error, runId, parentRunId, tags, options) {
183
74
  return __awaiter(this, void 0, void 0, function* () {
184
- // console.error(`>>> onChainError: runId: ${runId}`, error); // Debug log
185
- // Error is recorded within _endSpan
186
- this._endSpan(runId, undefined, error);
75
+ const traceClient = this.getTraceClient();
76
+ if (!traceClient)
77
+ return;
78
+ traceClient.recordError(error);
187
79
  });
188
80
  }
189
- // --- LLM Events ---
190
- _getLlmSpanName(serialized) {
191
- var _a;
192
- // Simplify extraction if possible, check common patterns
193
- const idPath = ((_a = serialized === null || serialized === void 0 ? void 0 : serialized.id) !== null && _a !== void 0 ? _a : []).join('/').toLowerCase();
194
- if (idPath.includes("openai"))
195
- return "OPENAI_API_CALL";
196
- if (idPath.includes("anthropic"))
197
- return "ANTHROPIC_API_CALL";
198
- if (idPath.includes("together"))
199
- return "TOGETHER_API_CALL";
200
- // Add other common providers if needed (e.g., google, bedrock)
201
- return "LLM_CALL"; // Default
202
- }
203
- // Generic LLM Start handler (covers both base LLM and ChatModel)
204
- _handleLlmStart(serialized, runId, inputData, extraParams,
205
- // tags?: string[] | undefined, // Often unused for LLM spans
206
- // metadata?: Record<string, unknown> | undefined, // Often unused for LLM spans
207
- options // Langchain passes invocation params here
208
- ) {
81
+ handleChainStart(serialized, inputs, runId, parentRunId, tags, metadata, name, runName, runType, options) {
209
82
  return __awaiter(this, void 0, void 0, function* () {
210
- var _a, _b, _c, _d, _e;
211
- // console.log(`>>> _handleLlmStart: runId: ${runId}`); // Debug log
212
- const traceClient = this._getActiveTraceClient();
213
- if (!traceClient)
83
+ let traceClient = this.getTraceClient();
84
+ if (!traceClient) {
85
+ console.warn("No trace client found");
214
86
  return;
215
- // Extract model name from options (common pattern) or extraParams
216
- const invocationParams = (_b = (_a = options === null || options === void 0 ? void 0 : options.invocation_params) !== null && _a !== void 0 ? _a : extraParams) !== null && _b !== void 0 ? _b : {};
217
- const modelName = (_d = (_c = invocationParams === null || invocationParams === void 0 ? void 0 : invocationParams.model_name) !== null && _c !== void 0 ? _c : invocationParams === null || invocationParams === void 0 ? void 0 : invocationParams.model) !== null && _d !== void 0 ? _d : 'unknown_model';
218
- const spanName = this._getLlmSpanName(serialized);
219
- this._startSpan(runId, spanName, "llm");
220
- // Prepare input payload
221
- let inputPayload = {
222
- model: modelName,
223
- params: invocationParams, // Record all invocation params
224
- // options: options // May include other config besides invocation_params
225
- };
226
- if ('prompts' in inputData) {
227
- inputPayload.prompts = inputData.prompts;
228
- }
229
- else if ('messages' in inputData) {
230
- // Langchain JS passes messages as BaseMessage[][]
231
- inputPayload.messages = (_e = inputData.messages[0]) !== null && _e !== void 0 ? _e : []; // Extract first batch element safely
232
- }
233
- // Record input associated with the started span
234
- const currentSpanId = this.runIdToSpanId[runId];
235
- if (currentSpanId) {
236
- // Input is recorded in the current context span of the TraceClient
237
- traceClient.recordInput(inputPayload); // Removed spanId
238
87
  }
88
+ traceClient.startSpan(name !== null && name !== void 0 ? name : "unknown_chain", { spanType: "chain" });
89
+ traceClient.recordInput(inputs);
239
90
  });
240
91
  }
241
- onLlmStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, options // options might contain invocation_params
242
- ) {
92
+ handleChainEnd(outputs, runId, parentRunId, tags, options) {
243
93
  return __awaiter(this, void 0, void 0, function* () {
244
- console.log(`[Judgeval Handler] onLlmStart called for runId: ${runId}`); // Added log
245
- yield this._handleLlmStart(serialized, runId, { prompts }, extraParams, options);
94
+ const traceClient = this.getTraceClient();
95
+ if (!traceClient)
96
+ return;
97
+ traceClient.recordOutput(outputs);
98
+ traceClient.endSpan();
246
99
  });
247
100
  }
248
- // Handles Chat Model start specifically
249
- onChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, options // options might contain invocation_params
250
- ) {
101
+ handleChainError(error, runId, parentRunId, tags, options) {
251
102
  return __awaiter(this, void 0, void 0, function* () {
252
- console.log(`[Judgeval Handler] onChatModelStart called for runId: ${runId}`); // Added log
253
- yield this._handleLlmStart(serialized, runId, { messages }, extraParams, options);
103
+ console.log(`Chain error: ${error}`);
104
+ const traceClient = this.getTraceClient();
105
+ if (!traceClient)
106
+ return;
107
+ traceClient.recordError(error);
108
+ traceClient.endSpan();
254
109
  });
255
110
  }
256
- /**
257
- * Handles the end of an LLM call. Extracts the output, usage data, and ends the corresponding span.
258
- * @param output The result from the LLM call.
259
- * @param runId The unique ID of the run.
260
- */
261
- onLlmEnd(output, runId) {
111
+ handleToolStart(serialized, inputStr, runId, parentRunId, tags, metadata, name, runType, runName, options) {
262
112
  return __awaiter(this, void 0, void 0, function* () {
263
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
264
- // console.log(`>>> onLlmEnd: runId: ${runId}`); // Debug log
265
- const traceClient = this._getActiveTraceClient();
266
- const spanId = this.runIdToSpanId[runId]; // Needed for context if recording output directly here
267
- if (!traceClient || !spanId) {
268
- // console.warn(`Judgeval Handler: Skipping onLlmEnd for runId ${runId}. Trace client or span ID missing.`); // Debug log
269
- this._endSpan(runId, output); // Still attempt to end span if possible, passing raw output
113
+ const traceClient = this.getTraceClient();
114
+ if (!traceClient)
270
115
  return;
116
+ // Python SDK doesn't handle name None case
117
+ traceClient.startSpan(name !== null && name !== void 0 ? name : "unknown_tool", { spanType: "tool" });
118
+ if (name) {
119
+ this.executedTools.push(name);
120
+ this.executedNodeTools.push(this.previousNode ? `${this.previousNode}:${name}` : name);
271
121
  }
272
- // Process LLMResult to extract relevant data
273
- const generation = (_b = (_a = output.generations) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b[0];
274
- let llmOutputPayload = {};
275
- if (generation) {
276
- // Handle ChatGeneration vs regular Generation
277
- if ("message" in generation && generation.message instanceof messages_1.BaseMessage) {
278
- const aiMessage = generation.message; // Assume AI message for output
279
- llmOutputPayload.content = aiMessage.content;
280
- // Include tool calls if present
281
- if (aiMessage.tool_calls && aiMessage.tool_calls.length > 0) {
282
- llmOutputPayload.tool_calls = aiMessage.tool_calls;
283
- }
284
- if (aiMessage.invalid_tool_calls && aiMessage.invalid_tool_calls.length > 0) {
285
- llmOutputPayload.invalid_tool_calls = aiMessage.invalid_tool_calls;
286
- }
287
- // Usage metadata might be here (e.g., OpenAI)
288
- if (aiMessage.usage_metadata) {
289
- llmOutputPayload.usage_metadata = aiMessage.usage_metadata;
290
- }
291
- }
292
- else if (generation.text) {
293
- // Handle plain text generation
294
- llmOutputPayload.content = generation.text;
295
- }
296
- // Standardize token usage extraction if not in usage_metadata
297
- // Check generationInfo first, then llmOutput
298
- const tokenUsage = (_d = (_c = generation.generationInfo) === null || _c === void 0 ? void 0 : _c.tokenUsage) !== null && _d !== void 0 ? _d : (_e = output.llmOutput) === null || _e === void 0 ? void 0 : _e.tokenUsage;
299
- if (tokenUsage && !((_f = llmOutputPayload.usage_metadata) === null || _f === void 0 ? void 0 : _f.token_usage)) { // Avoid duplication if already in usage_metadata
300
- llmOutputPayload.token_usage = {
301
- completionTokens: (_g = tokenUsage.completionTokens) !== null && _g !== void 0 ? _g : tokenUsage.completion_tokens,
302
- promptTokens: (_h = tokenUsage.promptTokens) !== null && _h !== void 0 ? _h : tokenUsage.prompt_tokens,
303
- totalTokens: (_j = tokenUsage.totalTokens) !== null && _j !== void 0 ? _j : tokenUsage.total_tokens,
304
- };
305
- // Normalize keys within usage_metadata if present
306
- }
307
- else if ((_k = llmOutputPayload.usage_metadata) === null || _k === void 0 ? void 0 : _k.token_usage) {
308
- const usageMeta = llmOutputPayload.usage_metadata.token_usage;
309
- llmOutputPayload.token_usage = {
310
- completionTokens: (_l = usageMeta.completionTokens) !== null && _l !== void 0 ? _l : usageMeta.completion_tokens,
311
- promptTokens: (_m = usageMeta.promptTokens) !== null && _m !== void 0 ? _m : usageMeta.prompt_tokens,
312
- totalTokens: (_o = usageMeta.totalTokens) !== null && _o !== void 0 ? _o : usageMeta.total_tokens,
313
- };
314
- }
315
- // Include other generationInfo if available and potentially useful
316
- if (generation.generationInfo) {
317
- llmOutputPayload.generation_info = generation.generationInfo;
318
- }
319
- }
320
- // Include raw LLM output if available and potentially useful (can be verbose)
321
- // if (output.llmOutput) {
322
- // llmOutputPayload.raw_llm_output = output.llmOutput;
323
- // }
324
- // Output is recorded within _endSpan
325
- this._endSpan(runId, llmOutputPayload);
122
+ traceClient.recordInput({
123
+ args: inputStr,
124
+ kwargs: options,
125
+ });
326
126
  });
327
127
  }
328
- onLlmError(error, runId, parentRunId) {
128
+ handleToolEnd(output, runId, parentRunId, options) {
329
129
  return __awaiter(this, void 0, void 0, function* () {
330
- // console.error(`>>> onLlmError: runId: ${runId}`, error); // Debug log
331
- // Error is recorded within _endSpan
332
- this._endSpan(runId, undefined, error);
130
+ const traceClient = this.getTraceClient();
131
+ if (!traceClient)
132
+ return;
133
+ traceClient.recordOutput(output);
134
+ traceClient.endSpan();
333
135
  });
334
136
  }
335
- // --- Tool Events ---
336
- onToolStart(serialized, inputStr, // input is often a stringified object
337
- runId, parentRunId, tags, metadata) {
137
+ handleToolError(error, runId, parentRunId, options) {
338
138
  return __awaiter(this, void 0, void 0, function* () {
339
- var _a;
340
- console.log(`[Judgeval Handler] onToolStart called for runId: ${runId}`); // Added log
341
- // console.log(`>>> onToolStart: runId: ${runId}, name: ${serialized?.name}`); // Debug log
342
- const traceClient = this._getActiveTraceClient();
139
+ console.log(`Tool error: ${error}`);
140
+ const traceClient = this.getTraceClient();
343
141
  if (!traceClient)
344
142
  return;
345
- // Match Python: Use the tool name directly as the span name
346
- const toolName = (_a = serialized === null || serialized === void 0 ? void 0 : serialized.name) !== null && _a !== void 0 ? _a : 'Unknown Tool';
347
- const spanName = toolName; // Removed "TOOL: " prefix
348
- this._startSpan(runId, spanName, "tool");
349
- // Try to parse inputStr if it's JSON, otherwise keep as string
350
- let parsedInput = inputStr;
351
- try {
352
- // Avoid parsing null/empty strings
353
- if (inputStr && inputStr.trim().startsWith('{') && inputStr.trim().endsWith('}')) {
354
- parsedInput = JSON.parse(inputStr);
355
- }
356
- }
357
- catch (e) {
358
- // Ignore error, keep as string if parsing fails
359
- }
360
- // Record input associated with the started span
361
- const currentSpanId = this.runIdToSpanId[runId];
362
- if (currentSpanId) {
363
- // Input is recorded in the current context span of the TraceClient
364
- traceClient.recordInput({ input: parsedInput /* , options: options */ }); // Removed spanId
365
- }
366
- // Track tool execution (if needed externally)
367
- // this.executedTools.push(toolName); // Example
143
+ traceClient.recordError(error);
144
+ traceClient.endSpan();
368
145
  });
369
146
  }
370
- onToolEnd(output, // Tool output is typically a string
371
- runId, parentRunId) {
147
+ handleAgentAction(action, runId, parentRunId, tags, options) {
372
148
  return __awaiter(this, void 0, void 0, function* () {
373
- // console.log(`>>> onToolEnd: runId: ${runId}`); // Debug log
374
- // Output is recorded within _endSpan
375
- this._endSpan(runId, output);
149
+ console.log(`Agent action: ${action}`);
376
150
  });
377
151
  }
378
- onToolError(error, runId, parentRunId) {
152
+ handleAgentFinish(finish, runId, parentRunId, tags, options) {
379
153
  return __awaiter(this, void 0, void 0, function* () {
380
- // console.error(`>>> onToolError: runId: ${runId}`, error); // Debug log
381
- // Error is recorded within _endSpan
382
- this._endSpan(runId, undefined, error);
154
+ console.log(`Agent finish: ${finish}`);
383
155
  });
384
156
  }
385
- // --- Retriever Events ---
386
- onRetrieverStart(serialized, query, runId, parentRunId, tags, metadata) {
157
+ handleLLMStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, runName, options) {
387
158
  return __awaiter(this, void 0, void 0, function* () {
388
- console.log(`[Judgeval Handler] onRetrieverStart called for runId: ${runId}`); // Added log
389
- // console.log(`>>> onRetrieverStart: runId: ${runId}, name: ${serialized?.name}`); // Debug log
390
- const traceClient = this._getActiveTraceClient();
159
+ const name = "LLM call";
160
+ const traceClient = this.getTraceClient();
391
161
  if (!traceClient)
392
162
  return;
393
- // Match Python naming convention
394
- const retrieverName = serialized === null || serialized === void 0 ? void 0 : serialized.name;
395
- let spanName;
396
- if (retrieverName) {
397
- spanName = `RETRIEVER_${retrieverName.toUpperCase()}`;
398
- }
399
- else {
400
- spanName = "RETRIEVER_CALL";
401
- }
402
- // const spanName = `RETRIEVER: ${retrieverName}`; // Old naming
403
- this._startSpan(runId, spanName, "retriever"); // Use 'retriever' span type
404
- // Record input associated with the started span
405
- const currentSpanId = this.runIdToSpanId[runId];
406
- if (currentSpanId) {
407
- // Input is recorded in the current context span of the TraceClient
408
- traceClient.recordInput({ query: query /* , options: options */ }); // Removed spanId
409
- }
163
+ traceClient.startSpan(name, { spanType: "llm" });
164
+ traceClient.recordInput({
165
+ args: prompts,
166
+ kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata }, options),
167
+ });
410
168
  });
411
169
  }
412
- onRetrieverEnd(documents, runId, parentRunId, tags) {
170
+ // Also called on chat model end
171
+ handleLLMEnd(output, runId, parentRunId, tags, options) {
413
172
  return __awaiter(this, void 0, void 0, function* () {
414
- // console.log(`>>> onRetrieverEnd: runId: ${runId}, docs: ${documents.length}`); // Debug log
415
- const traceClient = this._getActiveTraceClient();
416
- if (!traceClient) {
417
- // If no trace client, we still need to clean up the runId mapping potentially
418
- this._endSpan(runId);
173
+ const traceClient = this.getTraceClient();
174
+ if (!traceClient)
419
175
  return;
420
- }
421
- // Format output similar to Python's handler
422
- const docSummary = documents.map((doc, i) => ({
423
- index: i,
424
- page_content: doc.pageContent.substring(0, 150) + (doc.pageContent.length > 150 ? "..." : ""), // Slightly longer preview
425
- metadata: doc.metadata,
426
- }));
427
- const output = {
428
- document_count: documents.length,
429
- documents: docSummary,
430
- };
431
- // Output is recorded within _endSpan
432
- this._endSpan(runId, output);
176
+ traceClient.recordOutput(output.generations[0][0].text);
177
+ traceClient.endSpan();
178
+ });
179
+ }
180
+ handleLLMError(error, runId, parentRunId, tags, options) {
181
+ return __awaiter(this, void 0, void 0, function* () {
182
+ console.log(`LLM error: ${error}`);
183
+ const traceClient = this.getTraceClient();
184
+ if (!traceClient)
185
+ return;
186
+ traceClient.recordError(error);
187
+ traceClient.endSpan();
433
188
  });
434
189
  }
435
- onRetrieverError(error, runId, parentRunId, tags) {
190
+ // Why is there no handleChatModelEnd?
191
+ handleChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, name, runType, runName, options) {
436
192
  return __awaiter(this, void 0, void 0, function* () {
437
- // console.error(`>>> onRetrieverError: runId: ${runId}`, error); // Debug log
438
- // Error is recorded within _endSpan
439
- this._endSpan(runId, undefined, error);
193
+ let name_ = "LLM call";
194
+ if (serialized.id.includes("openai")) {
195
+ name_ = "OPENAI_API_CALL";
196
+ }
197
+ else if (serialized.id.includes("anthropic")) {
198
+ name_ = "ANTHROPIC_API_CALL";
199
+ }
200
+ else if (serialized.id.includes("together")) {
201
+ name_ = "TOGETHER_API_CALL";
202
+ }
203
+ else {
204
+ name_ = "LLM call";
205
+ }
206
+ const traceClient = this.getTraceClient();
207
+ if (!traceClient)
208
+ return;
209
+ traceClient.startSpan(name_, { spanType: "llm" });
210
+ traceClient.recordInput({
211
+ args: messages,
212
+ kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata }, options),
213
+ });
440
214
  });
441
215
  }
442
216
  }