@langwatch/scenario 0.4.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +11 -6
- package/dist/index.d.ts +11 -6
- package/dist/index.js +37 -49
- package/dist/index.mjs +37 -49
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -571,6 +571,10 @@ declare class JudgeAgent extends JudgeAgentAdapter {
|
|
|
571
571
|
* In multi-step mode, the AI SDK loops automatically: the judge can call
|
|
572
572
|
* expand_trace/grep_trace tools multiple times before reaching a terminal
|
|
573
573
|
* tool (finish_test/continue_test) or hitting the step limit.
|
|
574
|
+
*
|
|
575
|
+
* When the trace is large, toolChoice is relaxed to "required" so the
|
|
576
|
+
* judge can freely pick discovery tools (expand_trace/grep_trace) before
|
|
577
|
+
* being forced to a terminal decision.
|
|
574
578
|
*/
|
|
575
579
|
private invokeLLMWithDiscovery;
|
|
576
580
|
private parseToolCalls;
|
|
@@ -691,14 +695,15 @@ declare function estimateTokens(text: string): number;
|
|
|
691
695
|
* Expands one or more spans from a trace, returning their full details
|
|
692
696
|
* (attributes, events, status) with tree position context.
|
|
693
697
|
*
|
|
698
|
+
* Spans are matched by prefix: the caller can pass the truncated 8-char
|
|
699
|
+
* span ID shown in the skeleton and it will match any span whose full ID
|
|
700
|
+
* starts with that prefix.
|
|
701
|
+
*
|
|
694
702
|
* @param spans - The full array of ReadableSpan objects for the trace
|
|
695
|
-
* @param
|
|
696
|
-
* @returns Formatted string with full span details, truncated to ~
|
|
703
|
+
* @param spanIds - Span IDs or prefixes to expand
|
|
704
|
+
* @returns Formatted string with full span details, truncated to ~4096 tokens
|
|
697
705
|
*/
|
|
698
|
-
declare function expandTrace(spans: ReadableSpan[],
|
|
699
|
-
index?: number;
|
|
700
|
-
range?: string;
|
|
701
|
-
}): string;
|
|
706
|
+
declare function expandTrace(spans: ReadableSpan[], spanIds: string[]): string;
|
|
702
707
|
/**
|
|
703
708
|
* Searches across all span attributes, events, and content for a pattern.
|
|
704
709
|
* Returns matching spans with their tree position and matching content.
|
package/dist/index.d.ts
CHANGED
|
@@ -571,6 +571,10 @@ declare class JudgeAgent extends JudgeAgentAdapter {
|
|
|
571
571
|
* In multi-step mode, the AI SDK loops automatically: the judge can call
|
|
572
572
|
* expand_trace/grep_trace tools multiple times before reaching a terminal
|
|
573
573
|
* tool (finish_test/continue_test) or hitting the step limit.
|
|
574
|
+
*
|
|
575
|
+
* When the trace is large, toolChoice is relaxed to "required" so the
|
|
576
|
+
* judge can freely pick discovery tools (expand_trace/grep_trace) before
|
|
577
|
+
* being forced to a terminal decision.
|
|
574
578
|
*/
|
|
575
579
|
private invokeLLMWithDiscovery;
|
|
576
580
|
private parseToolCalls;
|
|
@@ -691,14 +695,15 @@ declare function estimateTokens(text: string): number;
|
|
|
691
695
|
* Expands one or more spans from a trace, returning their full details
|
|
692
696
|
* (attributes, events, status) with tree position context.
|
|
693
697
|
*
|
|
698
|
+
* Spans are matched by prefix: the caller can pass the truncated 8-char
|
|
699
|
+
* span ID shown in the skeleton and it will match any span whose full ID
|
|
700
|
+
* starts with that prefix.
|
|
701
|
+
*
|
|
694
702
|
* @param spans - The full array of ReadableSpan objects for the trace
|
|
695
|
-
* @param
|
|
696
|
-
* @returns Formatted string with full span details, truncated to ~
|
|
703
|
+
* @param spanIds - Span IDs or prefixes to expand
|
|
704
|
+
* @returns Formatted string with full span details, truncated to ~4096 tokens
|
|
697
705
|
*/
|
|
698
|
-
declare function expandTrace(spans: ReadableSpan[],
|
|
699
|
-
index?: number;
|
|
700
|
-
range?: string;
|
|
701
|
-
}): string;
|
|
706
|
+
declare function expandTrace(spans: ReadableSpan[], spanIds: string[]): string;
|
|
702
707
|
/**
|
|
703
708
|
* Searches across all span attributes, events, and content for a pattern.
|
|
704
709
|
* Returns matching spans with their tree position and matching content.
|
package/dist/index.js
CHANGED
|
@@ -1235,10 +1235,10 @@ function indexSpans(spans) {
|
|
|
1235
1235
|
const sorted = [...spans].sort((a, b) => {
|
|
1236
1236
|
return hrTimeToMs(a.startTime) - hrTimeToMs(b.startTime);
|
|
1237
1237
|
});
|
|
1238
|
-
return sorted.map((span
|
|
1238
|
+
return sorted.map((span) => ({
|
|
1239
1239
|
span,
|
|
1240
1240
|
children: [],
|
|
1241
|
-
|
|
1241
|
+
shortId: span.spanContext().spanId.slice(0, 8)
|
|
1242
1242
|
}));
|
|
1243
1243
|
}
|
|
1244
1244
|
|
|
@@ -1253,7 +1253,7 @@ function renderFullSpanNode(node) {
|
|
|
1253
1253
|
const status = getStatusIndicator(span);
|
|
1254
1254
|
const lines = [];
|
|
1255
1255
|
lines.push(
|
|
1256
|
-
`[${node.
|
|
1256
|
+
`[${node.shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
|
|
1257
1257
|
);
|
|
1258
1258
|
const attrs = cleanAttributes(span.attributes);
|
|
1259
1259
|
if (Object.keys(attrs).length > 0) {
|
|
@@ -1277,7 +1277,7 @@ function renderFullSpanNode(node) {
|
|
|
1277
1277
|
function truncateToCharBudget(text) {
|
|
1278
1278
|
if (text.length <= TOOL_RESULT_CHAR_BUDGET) return text;
|
|
1279
1279
|
const truncated = text.slice(0, TOOL_RESULT_CHAR_BUDGET);
|
|
1280
|
-
return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with
|
|
1280
|
+
return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with fewer span IDs.";
|
|
1281
1281
|
}
|
|
1282
1282
|
function spanToSearchableText(span) {
|
|
1283
1283
|
const parts = [span.name];
|
|
@@ -1296,30 +1296,22 @@ function spanToSearchableText(span) {
|
|
|
1296
1296
|
}
|
|
1297
1297
|
return parts.join("\n");
|
|
1298
1298
|
}
|
|
1299
|
-
function expandTrace(spans,
|
|
1299
|
+
function expandTrace(spans, spanIds) {
|
|
1300
1300
|
const nodes = indexSpans(spans);
|
|
1301
1301
|
if (nodes.length === 0) {
|
|
1302
1302
|
return "No spans recorded.";
|
|
1303
1303
|
}
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
if (range != null) {
|
|
1307
|
-
const parts = range.split("-").map(Number);
|
|
1308
|
-
startIdx = parts[0];
|
|
1309
|
-
endIdx = parts[1] ?? startIdx;
|
|
1310
|
-
} else if (index != null) {
|
|
1311
|
-
startIdx = index;
|
|
1312
|
-
endIdx = index;
|
|
1313
|
-
} else {
|
|
1314
|
-
return "Error: provide either index or range parameter.";
|
|
1304
|
+
if (spanIds.length === 0) {
|
|
1305
|
+
return "Error: provide at least one span ID.";
|
|
1315
1306
|
}
|
|
1316
|
-
const
|
|
1317
|
-
|
|
1318
|
-
return
|
|
1307
|
+
const selected = nodes.filter((n) => {
|
|
1308
|
+
const fullId = n.span.spanContext().spanId;
|
|
1309
|
+
return spanIds.some((prefix) => fullId.startsWith(prefix));
|
|
1310
|
+
});
|
|
1311
|
+
if (selected.length === 0) {
|
|
1312
|
+
const available = nodes.map((n) => n.shortId).join(", ");
|
|
1313
|
+
return `Error: no spans matched the given ID(s). Available span IDs: ${available}`;
|
|
1319
1314
|
}
|
|
1320
|
-
const selected = nodes.filter(
|
|
1321
|
-
(n) => n.index >= startIdx && n.index <= endIdx
|
|
1322
|
-
);
|
|
1323
1315
|
const lines = [];
|
|
1324
1316
|
for (const node of selected) {
|
|
1325
1317
|
const spanLines = renderFullSpanNode(node);
|
|
@@ -1353,7 +1345,7 @@ function grepTrace(spans, pattern) {
|
|
|
1353
1345
|
for (const { node, matchingLines } of limited) {
|
|
1354
1346
|
const duration = calculateSpanDuration(node.span);
|
|
1355
1347
|
lines.push(
|
|
1356
|
-
`--- [${node.
|
|
1348
|
+
`--- [${node.shortId}] ${node.span.name} (${formatDuration(duration)}) ---`
|
|
1357
1349
|
);
|
|
1358
1350
|
for (const line of matchingLines) {
|
|
1359
1351
|
lines.push(` ${line}`);
|
|
@@ -1847,14 +1839,12 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1847
1839
|
`Spans: ${spans.length} | Total Duration: ${formatDuration(totalDuration)}`,
|
|
1848
1840
|
""
|
|
1849
1841
|
];
|
|
1850
|
-
let sequence = 1;
|
|
1851
1842
|
const rootCount = tree.length;
|
|
1852
1843
|
tree.forEach((node, idx) => {
|
|
1853
|
-
|
|
1844
|
+
this.renderStructureNode(
|
|
1854
1845
|
node,
|
|
1855
1846
|
lines,
|
|
1856
1847
|
0,
|
|
1857
|
-
sequence,
|
|
1858
1848
|
idx === rootCount - 1
|
|
1859
1849
|
);
|
|
1860
1850
|
});
|
|
@@ -1894,14 +1884,12 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1894
1884
|
)}`,
|
|
1895
1885
|
""
|
|
1896
1886
|
];
|
|
1897
|
-
let sequence = 1;
|
|
1898
1887
|
const rootCount = tree.length;
|
|
1899
1888
|
tree.forEach((node, idx) => {
|
|
1900
|
-
|
|
1889
|
+
this.renderNode(
|
|
1901
1890
|
node,
|
|
1902
1891
|
lines,
|
|
1903
1892
|
0,
|
|
1904
|
-
sequence,
|
|
1905
1893
|
idx === rootCount - 1
|
|
1906
1894
|
);
|
|
1907
1895
|
});
|
|
@@ -1935,38 +1923,37 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1935
1923
|
}
|
|
1936
1924
|
return roots;
|
|
1937
1925
|
}
|
|
1938
|
-
renderStructureNode(node, lines, depth,
|
|
1926
|
+
renderStructureNode(node, lines, depth, isLast = true) {
|
|
1939
1927
|
const span = node.span;
|
|
1928
|
+
const shortId = span.spanContext().spanId.slice(0, 8);
|
|
1940
1929
|
const duration = calculateSpanDuration(span);
|
|
1941
1930
|
const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
|
|
1942
1931
|
const status = getStatusIndicator(span);
|
|
1943
1932
|
const tokens = getTokenUsage(span);
|
|
1944
1933
|
const prefix = this.getTreePrefix(depth, isLast);
|
|
1945
1934
|
lines.push(
|
|
1946
|
-
`${prefix}[${
|
|
1935
|
+
`${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)}${tokens})${status}`
|
|
1947
1936
|
);
|
|
1948
1937
|
lines.push("");
|
|
1949
|
-
let nextSeq = sequence + 1;
|
|
1950
1938
|
const childCount = node.children.length;
|
|
1951
1939
|
node.children.forEach((child, idx) => {
|
|
1952
|
-
|
|
1940
|
+
this.renderStructureNode(
|
|
1953
1941
|
child,
|
|
1954
1942
|
lines,
|
|
1955
1943
|
depth + 1,
|
|
1956
|
-
nextSeq,
|
|
1957
1944
|
idx === childCount - 1
|
|
1958
1945
|
);
|
|
1959
1946
|
});
|
|
1960
|
-
return nextSeq;
|
|
1961
1947
|
}
|
|
1962
|
-
renderNode(node, lines, depth,
|
|
1948
|
+
renderNode(node, lines, depth, isLast = true) {
|
|
1963
1949
|
const span = node.span;
|
|
1950
|
+
const shortId = span.spanContext().spanId.slice(0, 8);
|
|
1964
1951
|
const duration = calculateSpanDuration(span);
|
|
1965
1952
|
const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
|
|
1966
1953
|
const status = getStatusIndicator(span);
|
|
1967
1954
|
const prefix = this.getTreePrefix(depth, isLast);
|
|
1968
1955
|
lines.push(
|
|
1969
|
-
`${prefix}[${
|
|
1956
|
+
`${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
|
|
1970
1957
|
);
|
|
1971
1958
|
const attrIndent = this.getAttrIndent(depth, isLast);
|
|
1972
1959
|
const attrs = cleanAttributes(span.attributes);
|
|
@@ -1987,18 +1974,15 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1987
1974
|
}
|
|
1988
1975
|
}
|
|
1989
1976
|
lines.push("");
|
|
1990
|
-
let nextSeq = sequence + 1;
|
|
1991
1977
|
const childCount = node.children.length;
|
|
1992
1978
|
node.children.forEach((child, idx) => {
|
|
1993
|
-
|
|
1979
|
+
this.renderNode(
|
|
1994
1980
|
child,
|
|
1995
1981
|
lines,
|
|
1996
1982
|
depth + 1,
|
|
1997
|
-
nextSeq,
|
|
1998
1983
|
idx === childCount - 1
|
|
1999
1984
|
);
|
|
2000
1985
|
});
|
|
2001
|
-
return nextSeq;
|
|
2002
1986
|
}
|
|
2003
1987
|
getTreePrefix(depth, isLast) {
|
|
2004
1988
|
if (depth === 0) return "";
|
|
@@ -2109,13 +2093,12 @@ function buildFinishTestTool(criteria) {
|
|
|
2109
2093
|
function buildProgressiveDiscoveryTools(spans) {
|
|
2110
2094
|
return {
|
|
2111
2095
|
expand_trace: (0, import_ai2.tool)({
|
|
2112
|
-
description: "Expand one or more spans to see their full details (attributes, events, content). Use
|
|
2096
|
+
description: "Expand one or more spans to see their full details (attributes, events, content). Use the span ID shown in brackets in the trace skeleton.",
|
|
2113
2097
|
inputSchema: import_v44.z.object({
|
|
2114
|
-
|
|
2115
|
-
range: import_v44.z.string().optional().describe('Range of span indices to expand, e.g. "10-15"')
|
|
2098
|
+
span_ids: import_v44.z.array(import_v44.z.string()).describe("Span IDs (or 8-char prefixes) to expand")
|
|
2116
2099
|
}),
|
|
2117
|
-
execute: async ({
|
|
2118
|
-
return expandTrace(spans,
|
|
2100
|
+
execute: async ({ span_ids }) => {
|
|
2101
|
+
return expandTrace(spans, span_ids);
|
|
2119
2102
|
}
|
|
2120
2103
|
}),
|
|
2121
2104
|
grep_trace: (0, import_ai2.tool)({
|
|
@@ -2181,9 +2164,9 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2181
2164
|
...cfg
|
|
2182
2165
|
});
|
|
2183
2166
|
const tools = {
|
|
2167
|
+
...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {},
|
|
2184
2168
|
continue_test: buildContinueTestTool(),
|
|
2185
|
-
finish_test: buildFinishTestTool(criteria)
|
|
2186
|
-
...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {}
|
|
2169
|
+
finish_test: buildFinishTestTool(criteria)
|
|
2187
2170
|
};
|
|
2188
2171
|
const enforceJudgement = input.judgmentRequest != null;
|
|
2189
2172
|
const hasCriteria = criteria.length && criteria.length > 0;
|
|
@@ -2221,7 +2204,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2221
2204
|
buildTraceDigest(spans) {
|
|
2222
2205
|
const fullDigest = judgeSpanDigestFormatter.format(spans);
|
|
2223
2206
|
const isLargeTrace = spans.length > 0 && estimateTokens(fullDigest) > this.tokenThreshold;
|
|
2224
|
-
const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(
|
|
2207
|
+
const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(span_id) to see span details or grep_trace(pattern) to search across spans. Reference spans by the ID shown in brackets." : fullDigest;
|
|
2225
2208
|
this.logger.debug("Trace digest built", {
|
|
2226
2209
|
isLargeTrace,
|
|
2227
2210
|
estimatedTokens: estimateTokens(fullDigest)
|
|
@@ -2233,6 +2216,10 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2233
2216
|
* In multi-step mode, the AI SDK loops automatically: the judge can call
|
|
2234
2217
|
* expand_trace/grep_trace tools multiple times before reaching a terminal
|
|
2235
2218
|
* tool (finish_test/continue_test) or hitting the step limit.
|
|
2219
|
+
*
|
|
2220
|
+
* When the trace is large, toolChoice is relaxed to "required" so the
|
|
2221
|
+
* judge can freely pick discovery tools (expand_trace/grep_trace) before
|
|
2222
|
+
* being forced to a terminal decision.
|
|
2236
2223
|
*/
|
|
2237
2224
|
async invokeLLMWithDiscovery({
|
|
2238
2225
|
isLargeTrace,
|
|
@@ -2240,6 +2227,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2240
2227
|
}) {
|
|
2241
2228
|
var _a, _b;
|
|
2242
2229
|
if (isLargeTrace) {
|
|
2230
|
+
params.toolChoice = "required";
|
|
2243
2231
|
params.stopWhen = [
|
|
2244
2232
|
(0, import_ai2.stepCountIs)(this.maxDiscoverySteps),
|
|
2245
2233
|
(0, import_ai2.hasToolCall)("finish_test"),
|
package/dist/index.mjs
CHANGED
|
@@ -1175,10 +1175,10 @@ function indexSpans(spans) {
|
|
|
1175
1175
|
const sorted = [...spans].sort((a, b) => {
|
|
1176
1176
|
return hrTimeToMs(a.startTime) - hrTimeToMs(b.startTime);
|
|
1177
1177
|
});
|
|
1178
|
-
return sorted.map((span
|
|
1178
|
+
return sorted.map((span) => ({
|
|
1179
1179
|
span,
|
|
1180
1180
|
children: [],
|
|
1181
|
-
|
|
1181
|
+
shortId: span.spanContext().spanId.slice(0, 8)
|
|
1182
1182
|
}));
|
|
1183
1183
|
}
|
|
1184
1184
|
|
|
@@ -1193,7 +1193,7 @@ function renderFullSpanNode(node) {
|
|
|
1193
1193
|
const status = getStatusIndicator(span);
|
|
1194
1194
|
const lines = [];
|
|
1195
1195
|
lines.push(
|
|
1196
|
-
`[${node.
|
|
1196
|
+
`[${node.shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
|
|
1197
1197
|
);
|
|
1198
1198
|
const attrs = cleanAttributes(span.attributes);
|
|
1199
1199
|
if (Object.keys(attrs).length > 0) {
|
|
@@ -1217,7 +1217,7 @@ function renderFullSpanNode(node) {
|
|
|
1217
1217
|
function truncateToCharBudget(text) {
|
|
1218
1218
|
if (text.length <= TOOL_RESULT_CHAR_BUDGET) return text;
|
|
1219
1219
|
const truncated = text.slice(0, TOOL_RESULT_CHAR_BUDGET);
|
|
1220
|
-
return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with
|
|
1220
|
+
return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with fewer span IDs.";
|
|
1221
1221
|
}
|
|
1222
1222
|
function spanToSearchableText(span) {
|
|
1223
1223
|
const parts = [span.name];
|
|
@@ -1236,30 +1236,22 @@ function spanToSearchableText(span) {
|
|
|
1236
1236
|
}
|
|
1237
1237
|
return parts.join("\n");
|
|
1238
1238
|
}
|
|
1239
|
-
function expandTrace(spans,
|
|
1239
|
+
function expandTrace(spans, spanIds) {
|
|
1240
1240
|
const nodes = indexSpans(spans);
|
|
1241
1241
|
if (nodes.length === 0) {
|
|
1242
1242
|
return "No spans recorded.";
|
|
1243
1243
|
}
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
if (range != null) {
|
|
1247
|
-
const parts = range.split("-").map(Number);
|
|
1248
|
-
startIdx = parts[0];
|
|
1249
|
-
endIdx = parts[1] ?? startIdx;
|
|
1250
|
-
} else if (index != null) {
|
|
1251
|
-
startIdx = index;
|
|
1252
|
-
endIdx = index;
|
|
1253
|
-
} else {
|
|
1254
|
-
return "Error: provide either index or range parameter.";
|
|
1244
|
+
if (spanIds.length === 0) {
|
|
1245
|
+
return "Error: provide at least one span ID.";
|
|
1255
1246
|
}
|
|
1256
|
-
const
|
|
1257
|
-
|
|
1258
|
-
return
|
|
1247
|
+
const selected = nodes.filter((n) => {
|
|
1248
|
+
const fullId = n.span.spanContext().spanId;
|
|
1249
|
+
return spanIds.some((prefix) => fullId.startsWith(prefix));
|
|
1250
|
+
});
|
|
1251
|
+
if (selected.length === 0) {
|
|
1252
|
+
const available = nodes.map((n) => n.shortId).join(", ");
|
|
1253
|
+
return `Error: no spans matched the given ID(s). Available span IDs: ${available}`;
|
|
1259
1254
|
}
|
|
1260
|
-
const selected = nodes.filter(
|
|
1261
|
-
(n) => n.index >= startIdx && n.index <= endIdx
|
|
1262
|
-
);
|
|
1263
1255
|
const lines = [];
|
|
1264
1256
|
for (const node of selected) {
|
|
1265
1257
|
const spanLines = renderFullSpanNode(node);
|
|
@@ -1293,7 +1285,7 @@ function grepTrace(spans, pattern) {
|
|
|
1293
1285
|
for (const { node, matchingLines } of limited) {
|
|
1294
1286
|
const duration = calculateSpanDuration(node.span);
|
|
1295
1287
|
lines.push(
|
|
1296
|
-
`--- [${node.
|
|
1288
|
+
`--- [${node.shortId}] ${node.span.name} (${formatDuration(duration)}) ---`
|
|
1297
1289
|
);
|
|
1298
1290
|
for (const line of matchingLines) {
|
|
1299
1291
|
lines.push(` ${line}`);
|
|
@@ -1787,14 +1779,12 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1787
1779
|
`Spans: ${spans.length} | Total Duration: ${formatDuration(totalDuration)}`,
|
|
1788
1780
|
""
|
|
1789
1781
|
];
|
|
1790
|
-
let sequence = 1;
|
|
1791
1782
|
const rootCount = tree.length;
|
|
1792
1783
|
tree.forEach((node, idx) => {
|
|
1793
|
-
|
|
1784
|
+
this.renderStructureNode(
|
|
1794
1785
|
node,
|
|
1795
1786
|
lines,
|
|
1796
1787
|
0,
|
|
1797
|
-
sequence,
|
|
1798
1788
|
idx === rootCount - 1
|
|
1799
1789
|
);
|
|
1800
1790
|
});
|
|
@@ -1834,14 +1824,12 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1834
1824
|
)}`,
|
|
1835
1825
|
""
|
|
1836
1826
|
];
|
|
1837
|
-
let sequence = 1;
|
|
1838
1827
|
const rootCount = tree.length;
|
|
1839
1828
|
tree.forEach((node, idx) => {
|
|
1840
|
-
|
|
1829
|
+
this.renderNode(
|
|
1841
1830
|
node,
|
|
1842
1831
|
lines,
|
|
1843
1832
|
0,
|
|
1844
|
-
sequence,
|
|
1845
1833
|
idx === rootCount - 1
|
|
1846
1834
|
);
|
|
1847
1835
|
});
|
|
@@ -1875,38 +1863,37 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1875
1863
|
}
|
|
1876
1864
|
return roots;
|
|
1877
1865
|
}
|
|
1878
|
-
renderStructureNode(node, lines, depth,
|
|
1866
|
+
renderStructureNode(node, lines, depth, isLast = true) {
|
|
1879
1867
|
const span = node.span;
|
|
1868
|
+
const shortId = span.spanContext().spanId.slice(0, 8);
|
|
1880
1869
|
const duration = calculateSpanDuration(span);
|
|
1881
1870
|
const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
|
|
1882
1871
|
const status = getStatusIndicator(span);
|
|
1883
1872
|
const tokens = getTokenUsage(span);
|
|
1884
1873
|
const prefix = this.getTreePrefix(depth, isLast);
|
|
1885
1874
|
lines.push(
|
|
1886
|
-
`${prefix}[${
|
|
1875
|
+
`${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)}${tokens})${status}`
|
|
1887
1876
|
);
|
|
1888
1877
|
lines.push("");
|
|
1889
|
-
let nextSeq = sequence + 1;
|
|
1890
1878
|
const childCount = node.children.length;
|
|
1891
1879
|
node.children.forEach((child, idx) => {
|
|
1892
|
-
|
|
1880
|
+
this.renderStructureNode(
|
|
1893
1881
|
child,
|
|
1894
1882
|
lines,
|
|
1895
1883
|
depth + 1,
|
|
1896
|
-
nextSeq,
|
|
1897
1884
|
idx === childCount - 1
|
|
1898
1885
|
);
|
|
1899
1886
|
});
|
|
1900
|
-
return nextSeq;
|
|
1901
1887
|
}
|
|
1902
|
-
renderNode(node, lines, depth,
|
|
1888
|
+
renderNode(node, lines, depth, isLast = true) {
|
|
1903
1889
|
const span = node.span;
|
|
1890
|
+
const shortId = span.spanContext().spanId.slice(0, 8);
|
|
1904
1891
|
const duration = calculateSpanDuration(span);
|
|
1905
1892
|
const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
|
|
1906
1893
|
const status = getStatusIndicator(span);
|
|
1907
1894
|
const prefix = this.getTreePrefix(depth, isLast);
|
|
1908
1895
|
lines.push(
|
|
1909
|
-
`${prefix}[${
|
|
1896
|
+
`${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
|
|
1910
1897
|
);
|
|
1911
1898
|
const attrIndent = this.getAttrIndent(depth, isLast);
|
|
1912
1899
|
const attrs = cleanAttributes(span.attributes);
|
|
@@ -1927,18 +1914,15 @@ var JudgeSpanDigestFormatter = class {
|
|
|
1927
1914
|
}
|
|
1928
1915
|
}
|
|
1929
1916
|
lines.push("");
|
|
1930
|
-
let nextSeq = sequence + 1;
|
|
1931
1917
|
const childCount = node.children.length;
|
|
1932
1918
|
node.children.forEach((child, idx) => {
|
|
1933
|
-
|
|
1919
|
+
this.renderNode(
|
|
1934
1920
|
child,
|
|
1935
1921
|
lines,
|
|
1936
1922
|
depth + 1,
|
|
1937
|
-
nextSeq,
|
|
1938
1923
|
idx === childCount - 1
|
|
1939
1924
|
);
|
|
1940
1925
|
});
|
|
1941
|
-
return nextSeq;
|
|
1942
1926
|
}
|
|
1943
1927
|
getTreePrefix(depth, isLast) {
|
|
1944
1928
|
if (depth === 0) return "";
|
|
@@ -2049,13 +2033,12 @@ function buildFinishTestTool(criteria) {
|
|
|
2049
2033
|
function buildProgressiveDiscoveryTools(spans) {
|
|
2050
2034
|
return {
|
|
2051
2035
|
expand_trace: tool({
|
|
2052
|
-
description: "Expand one or more spans to see their full details (attributes, events, content). Use
|
|
2036
|
+
description: "Expand one or more spans to see their full details (attributes, events, content). Use the span ID shown in brackets in the trace skeleton.",
|
|
2053
2037
|
inputSchema: z4.object({
|
|
2054
|
-
|
|
2055
|
-
range: z4.string().optional().describe('Range of span indices to expand, e.g. "10-15"')
|
|
2038
|
+
span_ids: z4.array(z4.string()).describe("Span IDs (or 8-char prefixes) to expand")
|
|
2056
2039
|
}),
|
|
2057
|
-
execute: async ({
|
|
2058
|
-
return expandTrace(spans,
|
|
2040
|
+
execute: async ({ span_ids }) => {
|
|
2041
|
+
return expandTrace(spans, span_ids);
|
|
2059
2042
|
}
|
|
2060
2043
|
}),
|
|
2061
2044
|
grep_trace: tool({
|
|
@@ -2121,9 +2104,9 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2121
2104
|
...cfg
|
|
2122
2105
|
});
|
|
2123
2106
|
const tools = {
|
|
2107
|
+
...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {},
|
|
2124
2108
|
continue_test: buildContinueTestTool(),
|
|
2125
|
-
finish_test: buildFinishTestTool(criteria)
|
|
2126
|
-
...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {}
|
|
2109
|
+
finish_test: buildFinishTestTool(criteria)
|
|
2127
2110
|
};
|
|
2128
2111
|
const enforceJudgement = input.judgmentRequest != null;
|
|
2129
2112
|
const hasCriteria = criteria.length && criteria.length > 0;
|
|
@@ -2161,7 +2144,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2161
2144
|
buildTraceDigest(spans) {
|
|
2162
2145
|
const fullDigest = judgeSpanDigestFormatter.format(spans);
|
|
2163
2146
|
const isLargeTrace = spans.length > 0 && estimateTokens(fullDigest) > this.tokenThreshold;
|
|
2164
|
-
const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(
|
|
2147
|
+
const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(span_id) to see span details or grep_trace(pattern) to search across spans. Reference spans by the ID shown in brackets." : fullDigest;
|
|
2165
2148
|
this.logger.debug("Trace digest built", {
|
|
2166
2149
|
isLargeTrace,
|
|
2167
2150
|
estimatedTokens: estimateTokens(fullDigest)
|
|
@@ -2173,6 +2156,10 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2173
2156
|
* In multi-step mode, the AI SDK loops automatically: the judge can call
|
|
2174
2157
|
* expand_trace/grep_trace tools multiple times before reaching a terminal
|
|
2175
2158
|
* tool (finish_test/continue_test) or hitting the step limit.
|
|
2159
|
+
*
|
|
2160
|
+
* When the trace is large, toolChoice is relaxed to "required" so the
|
|
2161
|
+
* judge can freely pick discovery tools (expand_trace/grep_trace) before
|
|
2162
|
+
* being forced to a terminal decision.
|
|
2176
2163
|
*/
|
|
2177
2164
|
async invokeLLMWithDiscovery({
|
|
2178
2165
|
isLargeTrace,
|
|
@@ -2180,6 +2167,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
2180
2167
|
}) {
|
|
2181
2168
|
var _a, _b;
|
|
2182
2169
|
if (isLargeTrace) {
|
|
2170
|
+
params.toolChoice = "required";
|
|
2183
2171
|
params.stopWhen = [
|
|
2184
2172
|
stepCountIs(this.maxDiscoverySteps),
|
|
2185
2173
|
hasToolCall("finish_test"),
|