judgeval 0.1.41 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -5
- package/dist/cjs/clients.js.map +1 -1
- package/dist/cjs/common/integrations/langgraph.js +141 -367
- package/dist/cjs/common/integrations/langgraph.js.map +1 -1
- package/dist/cjs/common/logger.js +6 -6
- package/dist/cjs/common/logger.js.map +1 -1
- package/dist/cjs/common/tracer.js +300 -317
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/evaluation-run.js +9 -9
- package/dist/cjs/evaluation-run.js.map +1 -1
- package/dist/cjs/index.js +54 -54
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/judgment-client.js +73 -56
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/cjs/rules.js +8 -8
- package/dist/cjs/rules.js.map +1 -1
- package/dist/cjs/run-evaluation.js +60 -60
- package/dist/cjs/run-evaluation.js.map +1 -1
- package/dist/cjs/scorers/api-scorer.js +15 -15
- package/dist/cjs/scorers/api-scorer.js.map +1 -1
- package/dist/cjs/scorers/base-scorer.js +4 -4
- package/dist/cjs/scorers/base-scorer.js.map +1 -1
- package/dist/cjs/scorers/exact-match-scorer.js +2 -2
- package/dist/cjs/scorers/exact-match-scorer.js.map +1 -1
- package/dist/esm/clients.js.map +1 -1
- package/dist/esm/common/integrations/langgraph.js +142 -371
- package/dist/esm/common/integrations/langgraph.js.map +1 -1
- package/dist/esm/common/logger.js +1 -1
- package/dist/esm/common/logger.js.map +1 -1
- package/dist/esm/common/tracer.js +283 -298
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/evaluation-run.js +3 -3
- package/dist/esm/evaluation-run.js.map +1 -1
- package/dist/esm/index.js +12 -12
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/judgment-client.js +33 -16
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/esm/rules.js +7 -7
- package/dist/esm/rules.js.map +1 -1
- package/dist/esm/run-evaluation.js +4 -4
- package/dist/esm/run-evaluation.js.map +1 -1
- package/dist/esm/scorers/api-scorer.js +1 -1
- package/dist/esm/scorers/api-scorer.js.map +1 -1
- package/dist/esm/scorers/base-scorer.js +1 -1
- package/dist/esm/scorers/base-scorer.js.map +1 -1
- package/dist/esm/scorers/exact-match-scorer.js +1 -1
- package/dist/esm/scorers/exact-match-scorer.js.map +1 -1
- package/dist/types/clients.d.ts +1 -2
- package/dist/types/common/integrations/langgraph.d.ts +22 -30
- package/dist/types/common/tracer.d.ts +23 -21
- package/dist/types/data/result.d.ts +1 -1
- package/dist/types/evaluation-run.d.ts +3 -3
- package/dist/types/index.d.ts +12 -12
- package/dist/types/judgment-client.d.ts +4 -4
- package/dist/types/rules.d.ts +3 -3
- package/dist/types/run-evaluation.d.ts +4 -4
- package/dist/types/scorers/api-scorer.d.ts +3 -3
- package/dist/types/scorers/base-scorer.d.ts +2 -2
- package/dist/types/scorers/exact-match-scorer.d.ts +3 -3
- package/package.json +6 -3
package/README.md
CHANGED
|
@@ -131,12 +131,17 @@ const tracer = Tracer.getInstance({
|
|
|
131
131
|
enableEvaluations: true
|
|
132
132
|
});
|
|
133
133
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
134
|
+
// Analogous to Python SDK's with, e.g.
|
|
135
|
+
//
|
|
136
|
+
// with tracer.trace("my-trace") as trace:
|
|
137
|
+
// with trace.span("operation") as span:
|
|
138
|
+
// # Perform operations
|
|
139
|
+
//
|
|
140
|
+
for (const trace of tracer.trace("my-trace")) {
|
|
141
|
+
for (const span of trace.span("operation")) {
|
|
137
142
|
// Perform operations
|
|
138
|
-
}
|
|
139
|
-
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
140
145
|
```
|
|
141
146
|
|
|
142
147
|
## Result Retrieval
|
package/dist/cjs/clients.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"clients.js","sourceRoot":"","sources":["../../src/clients.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AACjC,oDAA4B;AAC5B,4DAA0C;AAC1C,8DAAmC;AAEnC,6BAA6B;AAC7B,MAAM,CAAC,MAAM,EAAE,CAAC;AAEhB,oCAAoC;AACpC,IAAI,YAAY,GAAkB,IAAI,CAAC;AAsC9B,oCAAY;AArCrB,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,uBAAA,YAAY,GAAG,IAAI,gBAAM,CAAC;YACxB,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,qDAAqD;QACrD,OAAO,CAAC,IAAI,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,uCAAuC;AACvC,IAAI,eAAe,GAAqB,IAAI,CAAC;AAyBtB,0CAAe;AAxBtC,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;IAClC,IAAI,CAAC;QACH,0BAAA,eAAe,GAAG,IAAI,aAAS,CAAC;YAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;SACtC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wDAAwD;QACxD,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC;AAED,sCAAsC;AACtC,IAAI,cAAc,
|
|
1
|
+
{"version":3,"file":"clients.js","sourceRoot":"","sources":["../../src/clients.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AACjC,oDAA4B;AAC5B,4DAA0C;AAC1C,8DAAmC;AAEnC,6BAA6B;AAC7B,MAAM,CAAC,MAAM,EAAE,CAAC;AAEhB,oCAAoC;AACpC,IAAI,YAAY,GAAkB,IAAI,CAAC;AAsC9B,oCAAY;AArCrB,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,uBAAA,YAAY,GAAG,IAAI,gBAAM,CAAC;YACxB,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,qDAAqD;QACrD,OAAO,CAAC,IAAI,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,uCAAuC;AACvC,IAAI,eAAe,GAAqB,IAAI,CAAC;AAyBtB,0CAAe;AAxBtC,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;IAClC,IAAI,CAAC;QACH,0BAAA,eAAe,GAAG,IAAI,aAAS,CAAC;YAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;SACtC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wDAAwD;QACxD,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;AACH,CAAC;AAED,sCAAsC;AACtC,IAAI,cAAc,GAAe,IAAI,CAAC;AAYE,wCAAc;AAXtD,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IACjC,IAAI,CAAC;QACH,yBAAA,cAAc,GAAG,IAAK,qBAAgB,CAAC;YACrC,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB;SACnC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uDAAuD;QACvD,OAAO,CAAC,IAAI,CAAC,qCAAqC,EAAE,KAAK,CAAC,CAAC;IAC7D,CAAC;AACH,CAAC"}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
// judgeval-js/src/integrations/langgraph.ts
|
|
3
2
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
4
3
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
5
4
|
return new (P || (P = Promise))(function (resolve, reject) {
|
|
@@ -12,431 +11,206 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
12
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
12
|
exports.JudgevalLanggraphCallbackHandler = void 0;
|
|
14
13
|
const base_1 = require("@langchain/core/callbacks/base");
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
// --- Global Handler Setup (REMOVED - No longer needed with context propagation) ---
|
|
14
|
+
const tracer_js_1 = require("../tracer.js");
|
|
15
|
+
// It's my understanding that you can only be on one node in the graph at a time
|
|
16
|
+
// That means we don't need to worry about the async problem
|
|
19
17
|
class JudgevalLanggraphCallbackHandler extends base_1.BaseCallbackHandler {
|
|
20
|
-
// Optional: Track executed nodes/tools if needed for external use cases like evaluation
|
|
21
|
-
// public executedNodes: string[] = [];
|
|
22
|
-
// public executedTools: string[] = [];
|
|
23
18
|
constructor(tracer) {
|
|
24
|
-
super();
|
|
25
|
-
this.name = "judgeval_langgraph_callback_handler";
|
|
26
|
-
this.
|
|
27
|
-
this.
|
|
28
|
-
this.
|
|
29
|
-
|
|
19
|
+
super();
|
|
20
|
+
this.name = "judgeval_langgraph_callback_handler";
|
|
21
|
+
this.executedNodeTools = [];
|
|
22
|
+
this.executedNodes = [];
|
|
23
|
+
this.executedTools = [];
|
|
24
|
+
this.tracer = tracer !== null && tracer !== void 0 ? tracer : tracer_js_1.Tracer.getInstance();
|
|
30
25
|
console.log(`[Judgeval Handler] Initialized. Monitoring Enabled: ${this.tracer.enableMonitoring}`); // Added prefix
|
|
31
26
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
// console.log("Judgeval Handler: Monitoring disabled."); // Reduce noise
|
|
36
|
-
return null;
|
|
37
|
-
}
|
|
27
|
+
getTraceClient() {
|
|
28
|
+
if (!this.tracer.enableMonitoring)
|
|
29
|
+
return undefined;
|
|
38
30
|
const client = this.tracer.getCurrentTrace();
|
|
39
31
|
if (!client) {
|
|
40
|
-
console.
|
|
41
|
-
}
|
|
42
|
-
// Explicitly return null if client is undefined
|
|
43
|
-
return client !== null && client !== void 0 ? client : null;
|
|
44
|
-
}
|
|
45
|
-
// Helper to create a unique span ID
|
|
46
|
-
_generateSpanId() {
|
|
47
|
-
return (0, uuid_1.v4)();
|
|
48
|
-
}
|
|
49
|
-
// Start a new span, associating it with the LangChain runId
|
|
50
|
-
_startSpan(lcRunId, name, spanType = "span") {
|
|
51
|
-
var _a;
|
|
52
|
-
const traceClient = this._getActiveTraceClient();
|
|
53
|
-
if (!traceClient)
|
|
54
|
-
return;
|
|
55
|
-
const startTime = Date.now() / 1000;
|
|
56
|
-
const spanId = this._generateSpanId();
|
|
57
|
-
this.runIdToSpanId[lcRunId] = spanId; // Map Langchain runId to our new spanId
|
|
58
|
-
// Get parent span ID from the current async context
|
|
59
|
-
const parentSpanId = tracer_1.currentSpanAsyncLocalStorage.getStore();
|
|
60
|
-
// Calculate depth based on parent
|
|
61
|
-
let depth = 0;
|
|
62
|
-
if (parentSpanId) {
|
|
63
|
-
const parentEntry = traceClient.entries.find((e) => e.span_id === parentSpanId && e.type === 'enter');
|
|
64
|
-
if (parentEntry) {
|
|
65
|
-
depth = ((_a = parentEntry.depth) !== null && _a !== void 0 ? _a : -1) + 1; // Increment parent depth
|
|
66
|
-
}
|
|
67
|
-
else {
|
|
68
|
-
// If parent entry not found (should be rare in async context), start at 0
|
|
69
|
-
console.warn(`Judgeval Handler: Parent span entry ${parentSpanId} not found for child ${spanId}. Defaulting depth to 0.`);
|
|
70
|
-
depth = 0;
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
else {
|
|
74
|
-
// No parent in context, this is a root span (relative to this handler's context)
|
|
75
|
-
depth = 0;
|
|
32
|
+
console.warn("No trace client found");
|
|
76
33
|
}
|
|
77
|
-
|
|
78
|
-
traceClient.addEntry({
|
|
79
|
-
type: 'enter',
|
|
80
|
-
function: name,
|
|
81
|
-
span_id: spanId,
|
|
82
|
-
depth: depth,
|
|
83
|
-
timestamp: startTime,
|
|
84
|
-
span_type: spanType,
|
|
85
|
-
parent_span_id: parentSpanId
|
|
86
|
-
});
|
|
87
|
-
this.spanStartTimes[spanId] = startTime;
|
|
88
|
-
// Set this new span as the current one in the context *for child operations*
|
|
89
|
-
// Note: This relies on Langchain's async flow preserving the context.
|
|
90
|
-
// If Langchain breaks context, this might not propagate correctly.
|
|
91
|
-
// It's generally better to *read* the parent from context when starting a span,
|
|
92
|
-
// rather than trying to manage pushing/popping onto the context here.
|
|
93
|
-
// The Tracer's observe/runInTrace methods handle setting the context.
|
|
34
|
+
return client;
|
|
94
35
|
}
|
|
95
|
-
|
|
96
|
-
_endSpan(lcRunId, output, error) {
|
|
97
|
-
const traceClient = this._getActiveTraceClient();
|
|
98
|
-
// Retrieve the spanId using the LangChain runId
|
|
99
|
-
const spanId = this.runIdToSpanId[lcRunId];
|
|
100
|
-
if (!traceClient || !spanId || !(spanId in this.spanStartTimes)) {
|
|
101
|
-
// console.log(`>>> _endSpan: Skipping endSpan for lcRunId ${lcRunId} / spanId ${spanId} - ID/trace/time missing or monitoring disabled.`); // Debug log
|
|
102
|
-
// Clean up the map even if we can't end the span fully
|
|
103
|
-
if (lcRunId in this.runIdToSpanId)
|
|
104
|
-
delete this.runIdToSpanId[lcRunId];
|
|
105
|
-
return;
|
|
106
|
-
}
|
|
107
|
-
const startTime = this.spanStartTimes[spanId];
|
|
108
|
-
const endTime = Date.now() / 1000;
|
|
109
|
-
const duration = endTime - startTime;
|
|
110
|
-
// Find the original 'enter' entry to get details like name and depth
|
|
111
|
-
const enterEntry = traceClient.entries.find(e => e.span_id === spanId && e.type === 'enter');
|
|
112
|
-
if (!enterEntry) {
|
|
113
|
-
console.warn(`Judgeval Handler: Could not find 'enter' entry for span ${spanId} (lcRunId: ${lcRunId}) during _endSpan.`);
|
|
114
|
-
// Clean up maps even if entry isn't found
|
|
115
|
-
delete this.spanStartTimes[spanId];
|
|
116
|
-
delete this.runIdToSpanId[lcRunId];
|
|
117
|
-
return;
|
|
118
|
-
}
|
|
119
|
-
// Record output or error *before* the exit entry
|
|
120
|
-
if (error) {
|
|
121
|
-
traceClient.recordOutput(error instanceof Error ? error : new Error(String(error)));
|
|
122
|
-
}
|
|
123
|
-
else if (output !== undefined) {
|
|
124
|
-
// Avoid recording 'undefined' as output explicitly
|
|
125
|
-
traceClient.recordOutput(output);
|
|
126
|
-
}
|
|
127
|
-
traceClient.addEntry({
|
|
128
|
-
type: 'exit',
|
|
129
|
-
function: enterEntry.function, // Use name from 'enter' entry
|
|
130
|
-
span_id: spanId,
|
|
131
|
-
depth: enterEntry.depth, // Use depth from 'enter' entry
|
|
132
|
-
timestamp: endTime,
|
|
133
|
-
duration: duration,
|
|
134
|
-
span_type: enterEntry.span_type // Use type from 'enter' entry
|
|
135
|
-
});
|
|
136
|
-
// Clean up maps
|
|
137
|
-
delete this.spanStartTimes[spanId];
|
|
138
|
-
delete this.runIdToSpanId[lcRunId];
|
|
139
|
-
// console.log(`>>> _endSpan: Ended span ${spanId} ('${enterEntry.function}'), lcRunId: ${lcRunId}`); // Debug log
|
|
140
|
-
}
|
|
141
|
-
// --- Chain Events ---
|
|
142
|
-
onChainStart(serialized, inputs, runId, parentRunId, tags, metadata, options // Langchain-JS doesn't seem to pass options here consistently
|
|
143
|
-
) {
|
|
36
|
+
handleRetrieverStart(serialized, query, runId, parentRunId, tags, metadata, name, options) {
|
|
144
37
|
return __awaiter(this, void 0, void 0, function* () {
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
38
|
+
let name_ = "RETRIEVER_CALL";
|
|
39
|
+
if (serialized === null || serialized === void 0 ? void 0 : serialized.name) {
|
|
40
|
+
name_ = `RETRIEVER_${serialized.name.toUpperCase()}`;
|
|
41
|
+
}
|
|
42
|
+
const traceClient = this.getTraceClient();
|
|
149
43
|
if (!traceClient)
|
|
150
44
|
return;
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
}
|
|
158
|
-
else {
|
|
159
|
-
// Use the serialized name or a generic fallback, avoiding node-specific prefixes
|
|
160
|
-
spanName = executionName;
|
|
161
|
-
}
|
|
162
|
-
// Removed node-specific logic:
|
|
163
|
-
// const nodeName = metadata?.langgraph_node ? String(metadata.langgraph_node) : null;
|
|
164
|
-
// if (nodeName) { ... } else { ... }
|
|
165
|
-
this._startSpan(runId, spanName, spanType);
|
|
166
|
-
// Record input associated with the started span
|
|
167
|
-
const currentSpanId = this.runIdToSpanId[runId];
|
|
168
|
-
if (currentSpanId) {
|
|
169
|
-
// Input is recorded in the current context span of the TraceClient
|
|
170
|
-
traceClient.recordInput({ args: inputs /* , options: options */ }); // Removed spanId
|
|
171
|
-
}
|
|
45
|
+
traceClient.startSpan(name_, { spanType: "retriever" });
|
|
46
|
+
traceClient.recordInput({
|
|
47
|
+
query,
|
|
48
|
+
tags,
|
|
49
|
+
metadata,
|
|
50
|
+
options,
|
|
51
|
+
});
|
|
172
52
|
});
|
|
173
53
|
}
|
|
174
|
-
|
|
175
|
-
runId, parentRunId, tags) {
|
|
54
|
+
handleRetrieverEnd(documents, runId, parentRunId, tags, options) {
|
|
176
55
|
return __awaiter(this, void 0, void 0, function* () {
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
56
|
+
const docSummary = documents.map((doc, i) => ({
|
|
57
|
+
index: i,
|
|
58
|
+
page_content: doc.pageContent.length > 100
|
|
59
|
+
? doc.pageContent.substring(0, 97) + "..."
|
|
60
|
+
: doc.pageContent,
|
|
61
|
+
metadata: doc.metadata,
|
|
62
|
+
}));
|
|
63
|
+
const traceClient = this.getTraceClient();
|
|
64
|
+
if (!traceClient)
|
|
65
|
+
return;
|
|
66
|
+
traceClient.recordOutput({
|
|
67
|
+
document_count: documents.length,
|
|
68
|
+
documents: docSummary,
|
|
69
|
+
});
|
|
70
|
+
traceClient.endSpan();
|
|
180
71
|
});
|
|
181
72
|
}
|
|
182
|
-
|
|
73
|
+
handleRetrieverError(error, runId, parentRunId, tags, options) {
|
|
183
74
|
return __awaiter(this, void 0, void 0, function* () {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
75
|
+
const traceClient = this.getTraceClient();
|
|
76
|
+
if (!traceClient)
|
|
77
|
+
return;
|
|
78
|
+
traceClient.recordError(error);
|
|
187
79
|
});
|
|
188
80
|
}
|
|
189
|
-
|
|
190
|
-
_getLlmSpanName(serialized) {
|
|
191
|
-
var _a;
|
|
192
|
-
// Simplify extraction if possible, check common patterns
|
|
193
|
-
const idPath = ((_a = serialized === null || serialized === void 0 ? void 0 : serialized.id) !== null && _a !== void 0 ? _a : []).join('/').toLowerCase();
|
|
194
|
-
if (idPath.includes("openai"))
|
|
195
|
-
return "OPENAI_API_CALL";
|
|
196
|
-
if (idPath.includes("anthropic"))
|
|
197
|
-
return "ANTHROPIC_API_CALL";
|
|
198
|
-
if (idPath.includes("together"))
|
|
199
|
-
return "TOGETHER_API_CALL";
|
|
200
|
-
// Add other common providers if needed (e.g., google, bedrock)
|
|
201
|
-
return "LLM_CALL"; // Default
|
|
202
|
-
}
|
|
203
|
-
// Generic LLM Start handler (covers both base LLM and ChatModel)
|
|
204
|
-
_handleLlmStart(serialized, runId, inputData, extraParams,
|
|
205
|
-
// tags?: string[] | undefined, // Often unused for LLM spans
|
|
206
|
-
// metadata?: Record<string, unknown> | undefined, // Often unused for LLM spans
|
|
207
|
-
options // Langchain passes invocation params here
|
|
208
|
-
) {
|
|
81
|
+
handleChainStart(serialized, inputs, runId, parentRunId, tags, metadata, name, runName, runType, options) {
|
|
209
82
|
return __awaiter(this, void 0, void 0, function* () {
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
if (!traceClient)
|
|
83
|
+
let traceClient = this.getTraceClient();
|
|
84
|
+
if (!traceClient) {
|
|
85
|
+
console.warn("No trace client found");
|
|
214
86
|
return;
|
|
215
|
-
// Extract model name from options (common pattern) or extraParams
|
|
216
|
-
const invocationParams = (_b = (_a = options === null || options === void 0 ? void 0 : options.invocation_params) !== null && _a !== void 0 ? _a : extraParams) !== null && _b !== void 0 ? _b : {};
|
|
217
|
-
const modelName = (_d = (_c = invocationParams === null || invocationParams === void 0 ? void 0 : invocationParams.model_name) !== null && _c !== void 0 ? _c : invocationParams === null || invocationParams === void 0 ? void 0 : invocationParams.model) !== null && _d !== void 0 ? _d : 'unknown_model';
|
|
218
|
-
const spanName = this._getLlmSpanName(serialized);
|
|
219
|
-
this._startSpan(runId, spanName, "llm");
|
|
220
|
-
// Prepare input payload
|
|
221
|
-
let inputPayload = {
|
|
222
|
-
model: modelName,
|
|
223
|
-
params: invocationParams, // Record all invocation params
|
|
224
|
-
// options: options // May include other config besides invocation_params
|
|
225
|
-
};
|
|
226
|
-
if ('prompts' in inputData) {
|
|
227
|
-
inputPayload.prompts = inputData.prompts;
|
|
228
|
-
}
|
|
229
|
-
else if ('messages' in inputData) {
|
|
230
|
-
// Langchain JS passes messages as BaseMessage[][]
|
|
231
|
-
inputPayload.messages = (_e = inputData.messages[0]) !== null && _e !== void 0 ? _e : []; // Extract first batch element safely
|
|
232
|
-
}
|
|
233
|
-
// Record input associated with the started span
|
|
234
|
-
const currentSpanId = this.runIdToSpanId[runId];
|
|
235
|
-
if (currentSpanId) {
|
|
236
|
-
// Input is recorded in the current context span of the TraceClient
|
|
237
|
-
traceClient.recordInput(inputPayload); // Removed spanId
|
|
238
87
|
}
|
|
88
|
+
traceClient.startSpan(name !== null && name !== void 0 ? name : "unknown_chain", { spanType: "chain" });
|
|
89
|
+
traceClient.recordInput(inputs);
|
|
239
90
|
});
|
|
240
91
|
}
|
|
241
|
-
|
|
242
|
-
) {
|
|
92
|
+
handleChainEnd(outputs, runId, parentRunId, tags, options) {
|
|
243
93
|
return __awaiter(this, void 0, void 0, function* () {
|
|
244
|
-
|
|
245
|
-
|
|
94
|
+
const traceClient = this.getTraceClient();
|
|
95
|
+
if (!traceClient)
|
|
96
|
+
return;
|
|
97
|
+
traceClient.recordOutput(outputs);
|
|
98
|
+
traceClient.endSpan();
|
|
246
99
|
});
|
|
247
100
|
}
|
|
248
|
-
|
|
249
|
-
onChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, options // options might contain invocation_params
|
|
250
|
-
) {
|
|
101
|
+
handleChainError(error, runId, parentRunId, tags, options) {
|
|
251
102
|
return __awaiter(this, void 0, void 0, function* () {
|
|
252
|
-
console.log(`
|
|
253
|
-
|
|
103
|
+
console.log(`Chain error: ${error}`);
|
|
104
|
+
const traceClient = this.getTraceClient();
|
|
105
|
+
if (!traceClient)
|
|
106
|
+
return;
|
|
107
|
+
traceClient.recordError(error);
|
|
108
|
+
traceClient.endSpan();
|
|
254
109
|
});
|
|
255
110
|
}
|
|
256
|
-
|
|
257
|
-
* Handles the end of an LLM call. Extracts the output, usage data, and ends the corresponding span.
|
|
258
|
-
* @param output The result from the LLM call.
|
|
259
|
-
* @param runId The unique ID of the run.
|
|
260
|
-
*/
|
|
261
|
-
onLlmEnd(output, runId) {
|
|
111
|
+
handleToolStart(serialized, inputStr, runId, parentRunId, tags, metadata, name, runType, runName, options) {
|
|
262
112
|
return __awaiter(this, void 0, void 0, function* () {
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
const traceClient = this._getActiveTraceClient();
|
|
266
|
-
const spanId = this.runIdToSpanId[runId]; // Needed for context if recording output directly here
|
|
267
|
-
if (!traceClient || !spanId) {
|
|
268
|
-
// console.warn(`Judgeval Handler: Skipping onLlmEnd for runId ${runId}. Trace client or span ID missing.`); // Debug log
|
|
269
|
-
this._endSpan(runId, output); // Still attempt to end span if possible, passing raw output
|
|
113
|
+
const traceClient = this.getTraceClient();
|
|
114
|
+
if (!traceClient)
|
|
270
115
|
return;
|
|
116
|
+
// Python SDK doesn't handle name None case
|
|
117
|
+
traceClient.startSpan(name !== null && name !== void 0 ? name : "unknown_tool", { spanType: "tool" });
|
|
118
|
+
if (name) {
|
|
119
|
+
this.executedTools.push(name);
|
|
120
|
+
this.executedNodeTools.push(this.previousNode ? `${this.previousNode}:${name}` : name);
|
|
271
121
|
}
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
// Handle ChatGeneration vs regular Generation
|
|
277
|
-
if ("message" in generation && generation.message instanceof messages_1.BaseMessage) {
|
|
278
|
-
const aiMessage = generation.message; // Assume AI message for output
|
|
279
|
-
llmOutputPayload.content = aiMessage.content;
|
|
280
|
-
// Include tool calls if present
|
|
281
|
-
if (aiMessage.tool_calls && aiMessage.tool_calls.length > 0) {
|
|
282
|
-
llmOutputPayload.tool_calls = aiMessage.tool_calls;
|
|
283
|
-
}
|
|
284
|
-
if (aiMessage.invalid_tool_calls && aiMessage.invalid_tool_calls.length > 0) {
|
|
285
|
-
llmOutputPayload.invalid_tool_calls = aiMessage.invalid_tool_calls;
|
|
286
|
-
}
|
|
287
|
-
// Usage metadata might be here (e.g., OpenAI)
|
|
288
|
-
if (aiMessage.usage_metadata) {
|
|
289
|
-
llmOutputPayload.usage_metadata = aiMessage.usage_metadata;
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
else if (generation.text) {
|
|
293
|
-
// Handle plain text generation
|
|
294
|
-
llmOutputPayload.content = generation.text;
|
|
295
|
-
}
|
|
296
|
-
// Standardize token usage extraction if not in usage_metadata
|
|
297
|
-
// Check generationInfo first, then llmOutput
|
|
298
|
-
const tokenUsage = (_d = (_c = generation.generationInfo) === null || _c === void 0 ? void 0 : _c.tokenUsage) !== null && _d !== void 0 ? _d : (_e = output.llmOutput) === null || _e === void 0 ? void 0 : _e.tokenUsage;
|
|
299
|
-
if (tokenUsage && !((_f = llmOutputPayload.usage_metadata) === null || _f === void 0 ? void 0 : _f.token_usage)) { // Avoid duplication if already in usage_metadata
|
|
300
|
-
llmOutputPayload.token_usage = {
|
|
301
|
-
completionTokens: (_g = tokenUsage.completionTokens) !== null && _g !== void 0 ? _g : tokenUsage.completion_tokens,
|
|
302
|
-
promptTokens: (_h = tokenUsage.promptTokens) !== null && _h !== void 0 ? _h : tokenUsage.prompt_tokens,
|
|
303
|
-
totalTokens: (_j = tokenUsage.totalTokens) !== null && _j !== void 0 ? _j : tokenUsage.total_tokens,
|
|
304
|
-
};
|
|
305
|
-
// Normalize keys within usage_metadata if present
|
|
306
|
-
}
|
|
307
|
-
else if ((_k = llmOutputPayload.usage_metadata) === null || _k === void 0 ? void 0 : _k.token_usage) {
|
|
308
|
-
const usageMeta = llmOutputPayload.usage_metadata.token_usage;
|
|
309
|
-
llmOutputPayload.token_usage = {
|
|
310
|
-
completionTokens: (_l = usageMeta.completionTokens) !== null && _l !== void 0 ? _l : usageMeta.completion_tokens,
|
|
311
|
-
promptTokens: (_m = usageMeta.promptTokens) !== null && _m !== void 0 ? _m : usageMeta.prompt_tokens,
|
|
312
|
-
totalTokens: (_o = usageMeta.totalTokens) !== null && _o !== void 0 ? _o : usageMeta.total_tokens,
|
|
313
|
-
};
|
|
314
|
-
}
|
|
315
|
-
// Include other generationInfo if available and potentially useful
|
|
316
|
-
if (generation.generationInfo) {
|
|
317
|
-
llmOutputPayload.generation_info = generation.generationInfo;
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
// Include raw LLM output if available and potentially useful (can be verbose)
|
|
321
|
-
// if (output.llmOutput) {
|
|
322
|
-
// llmOutputPayload.raw_llm_output = output.llmOutput;
|
|
323
|
-
// }
|
|
324
|
-
// Output is recorded within _endSpan
|
|
325
|
-
this._endSpan(runId, llmOutputPayload);
|
|
122
|
+
traceClient.recordInput({
|
|
123
|
+
args: inputStr,
|
|
124
|
+
kwargs: options,
|
|
125
|
+
});
|
|
326
126
|
});
|
|
327
127
|
}
|
|
328
|
-
|
|
128
|
+
handleToolEnd(output, runId, parentRunId, options) {
|
|
329
129
|
return __awaiter(this, void 0, void 0, function* () {
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
130
|
+
const traceClient = this.getTraceClient();
|
|
131
|
+
if (!traceClient)
|
|
132
|
+
return;
|
|
133
|
+
traceClient.recordOutput(output);
|
|
134
|
+
traceClient.endSpan();
|
|
333
135
|
});
|
|
334
136
|
}
|
|
335
|
-
|
|
336
|
-
onToolStart(serialized, inputStr, // input is often a stringified object
|
|
337
|
-
runId, parentRunId, tags, metadata) {
|
|
137
|
+
handleToolError(error, runId, parentRunId, options) {
|
|
338
138
|
return __awaiter(this, void 0, void 0, function* () {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
// console.log(`>>> onToolStart: runId: ${runId}, name: ${serialized?.name}`); // Debug log
|
|
342
|
-
const traceClient = this._getActiveTraceClient();
|
|
139
|
+
console.log(`Tool error: ${error}`);
|
|
140
|
+
const traceClient = this.getTraceClient();
|
|
343
141
|
if (!traceClient)
|
|
344
142
|
return;
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
const spanName = toolName; // Removed "TOOL: " prefix
|
|
348
|
-
this._startSpan(runId, spanName, "tool");
|
|
349
|
-
// Try to parse inputStr if it's JSON, otherwise keep as string
|
|
350
|
-
let parsedInput = inputStr;
|
|
351
|
-
try {
|
|
352
|
-
// Avoid parsing null/empty strings
|
|
353
|
-
if (inputStr && inputStr.trim().startsWith('{') && inputStr.trim().endsWith('}')) {
|
|
354
|
-
parsedInput = JSON.parse(inputStr);
|
|
355
|
-
}
|
|
356
|
-
}
|
|
357
|
-
catch (e) {
|
|
358
|
-
// Ignore error, keep as string if parsing fails
|
|
359
|
-
}
|
|
360
|
-
// Record input associated with the started span
|
|
361
|
-
const currentSpanId = this.runIdToSpanId[runId];
|
|
362
|
-
if (currentSpanId) {
|
|
363
|
-
// Input is recorded in the current context span of the TraceClient
|
|
364
|
-
traceClient.recordInput({ input: parsedInput /* , options: options */ }); // Removed spanId
|
|
365
|
-
}
|
|
366
|
-
// Track tool execution (if needed externally)
|
|
367
|
-
// this.executedTools.push(toolName); // Example
|
|
143
|
+
traceClient.recordError(error);
|
|
144
|
+
traceClient.endSpan();
|
|
368
145
|
});
|
|
369
146
|
}
|
|
370
|
-
|
|
371
|
-
runId, parentRunId) {
|
|
147
|
+
handleAgentAction(action, runId, parentRunId, tags, options) {
|
|
372
148
|
return __awaiter(this, void 0, void 0, function* () {
|
|
373
|
-
|
|
374
|
-
// Output is recorded within _endSpan
|
|
375
|
-
this._endSpan(runId, output);
|
|
149
|
+
console.log(`Agent action: ${action}`);
|
|
376
150
|
});
|
|
377
151
|
}
|
|
378
|
-
|
|
152
|
+
handleAgentFinish(finish, runId, parentRunId, tags, options) {
|
|
379
153
|
return __awaiter(this, void 0, void 0, function* () {
|
|
380
|
-
|
|
381
|
-
// Error is recorded within _endSpan
|
|
382
|
-
this._endSpan(runId, undefined, error);
|
|
154
|
+
console.log(`Agent finish: ${finish}`);
|
|
383
155
|
});
|
|
384
156
|
}
|
|
385
|
-
|
|
386
|
-
onRetrieverStart(serialized, query, runId, parentRunId, tags, metadata) {
|
|
157
|
+
handleLLMStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, runName, options) {
|
|
387
158
|
return __awaiter(this, void 0, void 0, function* () {
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
const traceClient = this._getActiveTraceClient();
|
|
159
|
+
const name = "LLM call";
|
|
160
|
+
const traceClient = this.getTraceClient();
|
|
391
161
|
if (!traceClient)
|
|
392
162
|
return;
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
}
|
|
399
|
-
else {
|
|
400
|
-
spanName = "RETRIEVER_CALL";
|
|
401
|
-
}
|
|
402
|
-
// const spanName = `RETRIEVER: ${retrieverName}`; // Old naming
|
|
403
|
-
this._startSpan(runId, spanName, "retriever"); // Use 'retriever' span type
|
|
404
|
-
// Record input associated with the started span
|
|
405
|
-
const currentSpanId = this.runIdToSpanId[runId];
|
|
406
|
-
if (currentSpanId) {
|
|
407
|
-
// Input is recorded in the current context span of the TraceClient
|
|
408
|
-
traceClient.recordInput({ query: query /* , options: options */ }); // Removed spanId
|
|
409
|
-
}
|
|
163
|
+
traceClient.startSpan(name, { spanType: "llm" });
|
|
164
|
+
traceClient.recordInput({
|
|
165
|
+
args: prompts,
|
|
166
|
+
kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata }, options),
|
|
167
|
+
});
|
|
410
168
|
});
|
|
411
169
|
}
|
|
412
|
-
|
|
170
|
+
// Also called on chat model end
|
|
171
|
+
handleLLMEnd(output, runId, parentRunId, tags, options) {
|
|
413
172
|
return __awaiter(this, void 0, void 0, function* () {
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
if (!traceClient) {
|
|
417
|
-
// If no trace client, we still need to clean up the runId mapping potentially
|
|
418
|
-
this._endSpan(runId);
|
|
173
|
+
const traceClient = this.getTraceClient();
|
|
174
|
+
if (!traceClient)
|
|
419
175
|
return;
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
})
|
|
427
|
-
const
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
this._endSpan(runId, output);
|
|
176
|
+
traceClient.recordOutput(output.generations[0][0].text);
|
|
177
|
+
traceClient.endSpan();
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
handleLLMError(error, runId, parentRunId, tags, options) {
|
|
181
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
182
|
+
console.log(`LLM error: ${error}`);
|
|
183
|
+
const traceClient = this.getTraceClient();
|
|
184
|
+
if (!traceClient)
|
|
185
|
+
return;
|
|
186
|
+
traceClient.recordError(error);
|
|
187
|
+
traceClient.endSpan();
|
|
433
188
|
});
|
|
434
189
|
}
|
|
435
|
-
|
|
190
|
+
// Why is there no handleChatModelEnd?
|
|
191
|
+
handleChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, name, runType, runName, options) {
|
|
436
192
|
return __awaiter(this, void 0, void 0, function* () {
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
193
|
+
let name_ = "LLM call";
|
|
194
|
+
if (serialized.id.includes("openai")) {
|
|
195
|
+
name_ = "OPENAI_API_CALL";
|
|
196
|
+
}
|
|
197
|
+
else if (serialized.id.includes("anthropic")) {
|
|
198
|
+
name_ = "ANTHROPIC_API_CALL";
|
|
199
|
+
}
|
|
200
|
+
else if (serialized.id.includes("together")) {
|
|
201
|
+
name_ = "TOGETHER_API_CALL";
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
name_ = "LLM call";
|
|
205
|
+
}
|
|
206
|
+
const traceClient = this.getTraceClient();
|
|
207
|
+
if (!traceClient)
|
|
208
|
+
return;
|
|
209
|
+
traceClient.startSpan(name_, { spanType: "llm" });
|
|
210
|
+
traceClient.recordInput({
|
|
211
|
+
args: messages,
|
|
212
|
+
kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata }, options),
|
|
213
|
+
});
|
|
440
214
|
});
|
|
441
215
|
}
|
|
442
216
|
}
|