@langwatch/scenario 0.4.4 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -571,6 +571,10 @@ declare class JudgeAgent extends JudgeAgentAdapter {
571
571
  * In multi-step mode, the AI SDK loops automatically: the judge can call
572
572
  * expand_trace/grep_trace tools multiple times before reaching a terminal
573
573
  * tool (finish_test/continue_test) or hitting the step limit.
574
+ *
575
+ * When the trace is large, toolChoice is relaxed to "required" so the
576
+ * judge can freely pick discovery tools (expand_trace/grep_trace) before
577
+ * being forced to a terminal decision.
574
578
  */
575
579
  private invokeLLMWithDiscovery;
576
580
  private parseToolCalls;
@@ -691,14 +695,15 @@ declare function estimateTokens(text: string): number;
691
695
  * Expands one or more spans from a trace, returning their full details
692
696
  * (attributes, events, status) with tree position context.
693
697
  *
698
+ * Spans are matched by prefix: the caller can pass the truncated 8-char
699
+ * span ID shown in the skeleton and it will match any span whose full ID
700
+ * starts with that prefix.
701
+ *
694
702
  * @param spans - The full array of ReadableSpan objects for the trace
695
- * @param options - Either a single `index` or a `range` string like "10-15"
696
- * @returns Formatted string with full span details, truncated to ~4000 tokens
703
+ * @param spanIds - Span IDs or prefixes to expand
704
+ * @returns Formatted string with full span details, truncated to ~4096 tokens
697
705
  */
698
- declare function expandTrace(spans: ReadableSpan[], { index, range }: {
699
- index?: number;
700
- range?: string;
701
- }): string;
706
+ declare function expandTrace(spans: ReadableSpan[], spanIds: string[]): string;
702
707
  /**
703
708
  * Searches across all span attributes, events, and content for a pattern.
704
709
  * Returns matching spans with their tree position and matching content.
package/dist/index.d.ts CHANGED
@@ -571,6 +571,10 @@ declare class JudgeAgent extends JudgeAgentAdapter {
571
571
  * In multi-step mode, the AI SDK loops automatically: the judge can call
572
572
  * expand_trace/grep_trace tools multiple times before reaching a terminal
573
573
  * tool (finish_test/continue_test) or hitting the step limit.
574
+ *
575
+ * When the trace is large, toolChoice is relaxed to "required" so the
576
+ * judge can freely pick discovery tools (expand_trace/grep_trace) before
577
+ * being forced to a terminal decision.
574
578
  */
575
579
  private invokeLLMWithDiscovery;
576
580
  private parseToolCalls;
@@ -691,14 +695,15 @@ declare function estimateTokens(text: string): number;
691
695
  * Expands one or more spans from a trace, returning their full details
692
696
  * (attributes, events, status) with tree position context.
693
697
  *
698
+ * Spans are matched by prefix: the caller can pass the truncated 8-char
699
+ * span ID shown in the skeleton and it will match any span whose full ID
700
+ * starts with that prefix.
701
+ *
694
702
  * @param spans - The full array of ReadableSpan objects for the trace
695
- * @param options - Either a single `index` or a `range` string like "10-15"
696
- * @returns Formatted string with full span details, truncated to ~4000 tokens
703
+ * @param spanIds - Span IDs or prefixes to expand
704
+ * @returns Formatted string with full span details, truncated to ~4096 tokens
697
705
  */
698
- declare function expandTrace(spans: ReadableSpan[], { index, range }: {
699
- index?: number;
700
- range?: string;
701
- }): string;
706
+ declare function expandTrace(spans: ReadableSpan[], spanIds: string[]): string;
702
707
  /**
703
708
  * Searches across all span attributes, events, and content for a pattern.
704
709
  * Returns matching spans with their tree position and matching content.
package/dist/index.js CHANGED
@@ -1235,10 +1235,10 @@ function indexSpans(spans) {
1235
1235
  const sorted = [...spans].sort((a, b) => {
1236
1236
  return hrTimeToMs(a.startTime) - hrTimeToMs(b.startTime);
1237
1237
  });
1238
- return sorted.map((span, i) => ({
1238
+ return sorted.map((span) => ({
1239
1239
  span,
1240
1240
  children: [],
1241
- index: i + 1
1241
+ shortId: span.spanContext().spanId.slice(0, 8)
1242
1242
  }));
1243
1243
  }
1244
1244
 
@@ -1253,7 +1253,7 @@ function renderFullSpanNode(node) {
1253
1253
  const status = getStatusIndicator(span);
1254
1254
  const lines = [];
1255
1255
  lines.push(
1256
- `[${node.index}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1256
+ `[${node.shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1257
1257
  );
1258
1258
  const attrs = cleanAttributes(span.attributes);
1259
1259
  if (Object.keys(attrs).length > 0) {
@@ -1277,7 +1277,7 @@ function renderFullSpanNode(node) {
1277
1277
  function truncateToCharBudget(text) {
1278
1278
  if (text.length <= TOOL_RESULT_CHAR_BUDGET) return text;
1279
1279
  const truncated = text.slice(0, TOOL_RESULT_CHAR_BUDGET);
1280
- return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with a narrower range.";
1280
+ return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with fewer span IDs.";
1281
1281
  }
1282
1282
  function spanToSearchableText(span) {
1283
1283
  const parts = [span.name];
@@ -1296,30 +1296,22 @@ function spanToSearchableText(span) {
1296
1296
  }
1297
1297
  return parts.join("\n");
1298
1298
  }
1299
- function expandTrace(spans, { index, range }) {
1299
+ function expandTrace(spans, spanIds) {
1300
1300
  const nodes = indexSpans(spans);
1301
1301
  if (nodes.length === 0) {
1302
1302
  return "No spans recorded.";
1303
1303
  }
1304
- let startIdx;
1305
- let endIdx;
1306
- if (range != null) {
1307
- const parts = range.split("-").map(Number);
1308
- startIdx = parts[0];
1309
- endIdx = parts[1] ?? startIdx;
1310
- } else if (index != null) {
1311
- startIdx = index;
1312
- endIdx = index;
1313
- } else {
1314
- return "Error: provide either index or range parameter.";
1304
+ if (spanIds.length === 0) {
1305
+ return "Error: provide at least one span ID.";
1315
1306
  }
1316
- const maxIndex = nodes.length;
1317
- if (startIdx < 1 || endIdx > maxIndex || startIdx > endIdx) {
1318
- return `Error: span index out of range. Valid range is 1-${maxIndex}.`;
1307
+ const selected = nodes.filter((n) => {
1308
+ const fullId = n.span.spanContext().spanId;
1309
+ return spanIds.some((prefix) => fullId.startsWith(prefix));
1310
+ });
1311
+ if (selected.length === 0) {
1312
+ const available = nodes.map((n) => n.shortId).join(", ");
1313
+ return `Error: no spans matched the given ID(s). Available span IDs: ${available}`;
1319
1314
  }
1320
- const selected = nodes.filter(
1321
- (n) => n.index >= startIdx && n.index <= endIdx
1322
- );
1323
1315
  const lines = [];
1324
1316
  for (const node of selected) {
1325
1317
  const spanLines = renderFullSpanNode(node);
@@ -1353,7 +1345,7 @@ function grepTrace(spans, pattern) {
1353
1345
  for (const { node, matchingLines } of limited) {
1354
1346
  const duration = calculateSpanDuration(node.span);
1355
1347
  lines.push(
1356
- `--- [${node.index}] ${node.span.name} (${formatDuration(duration)}) ---`
1348
+ `--- [${node.shortId}] ${node.span.name} (${formatDuration(duration)}) ---`
1357
1349
  );
1358
1350
  for (const line of matchingLines) {
1359
1351
  lines.push(` ${line}`);
@@ -1847,14 +1839,12 @@ var JudgeSpanDigestFormatter = class {
1847
1839
  `Spans: ${spans.length} | Total Duration: ${formatDuration(totalDuration)}`,
1848
1840
  ""
1849
1841
  ];
1850
- let sequence = 1;
1851
1842
  const rootCount = tree.length;
1852
1843
  tree.forEach((node, idx) => {
1853
- sequence = this.renderStructureNode(
1844
+ this.renderStructureNode(
1854
1845
  node,
1855
1846
  lines,
1856
1847
  0,
1857
- sequence,
1858
1848
  idx === rootCount - 1
1859
1849
  );
1860
1850
  });
@@ -1894,14 +1884,12 @@ var JudgeSpanDigestFormatter = class {
1894
1884
  )}`,
1895
1885
  ""
1896
1886
  ];
1897
- let sequence = 1;
1898
1887
  const rootCount = tree.length;
1899
1888
  tree.forEach((node, idx) => {
1900
- sequence = this.renderNode(
1889
+ this.renderNode(
1901
1890
  node,
1902
1891
  lines,
1903
1892
  0,
1904
- sequence,
1905
1893
  idx === rootCount - 1
1906
1894
  );
1907
1895
  });
@@ -1935,38 +1923,37 @@ var JudgeSpanDigestFormatter = class {
1935
1923
  }
1936
1924
  return roots;
1937
1925
  }
1938
- renderStructureNode(node, lines, depth, sequence, isLast = true) {
1926
+ renderStructureNode(node, lines, depth, isLast = true) {
1939
1927
  const span = node.span;
1928
+ const shortId = span.spanContext().spanId.slice(0, 8);
1940
1929
  const duration = calculateSpanDuration(span);
1941
1930
  const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
1942
1931
  const status = getStatusIndicator(span);
1943
1932
  const tokens = getTokenUsage(span);
1944
1933
  const prefix = this.getTreePrefix(depth, isLast);
1945
1934
  lines.push(
1946
- `${prefix}[${sequence}] ${timestamp} ${span.name} (${formatDuration(duration)}${tokens})${status}`
1935
+ `${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)}${tokens})${status}`
1947
1936
  );
1948
1937
  lines.push("");
1949
- let nextSeq = sequence + 1;
1950
1938
  const childCount = node.children.length;
1951
1939
  node.children.forEach((child, idx) => {
1952
- nextSeq = this.renderStructureNode(
1940
+ this.renderStructureNode(
1953
1941
  child,
1954
1942
  lines,
1955
1943
  depth + 1,
1956
- nextSeq,
1957
1944
  idx === childCount - 1
1958
1945
  );
1959
1946
  });
1960
- return nextSeq;
1961
1947
  }
1962
- renderNode(node, lines, depth, sequence, isLast = true) {
1948
+ renderNode(node, lines, depth, isLast = true) {
1963
1949
  const span = node.span;
1950
+ const shortId = span.spanContext().spanId.slice(0, 8);
1964
1951
  const duration = calculateSpanDuration(span);
1965
1952
  const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
1966
1953
  const status = getStatusIndicator(span);
1967
1954
  const prefix = this.getTreePrefix(depth, isLast);
1968
1955
  lines.push(
1969
- `${prefix}[${sequence}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1956
+ `${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1970
1957
  );
1971
1958
  const attrIndent = this.getAttrIndent(depth, isLast);
1972
1959
  const attrs = cleanAttributes(span.attributes);
@@ -1987,18 +1974,15 @@ var JudgeSpanDigestFormatter = class {
1987
1974
  }
1988
1975
  }
1989
1976
  lines.push("");
1990
- let nextSeq = sequence + 1;
1991
1977
  const childCount = node.children.length;
1992
1978
  node.children.forEach((child, idx) => {
1993
- nextSeq = this.renderNode(
1979
+ this.renderNode(
1994
1980
  child,
1995
1981
  lines,
1996
1982
  depth + 1,
1997
- nextSeq,
1998
1983
  idx === childCount - 1
1999
1984
  );
2000
1985
  });
2001
- return nextSeq;
2002
1986
  }
2003
1987
  getTreePrefix(depth, isLast) {
2004
1988
  if (depth === 0) return "";
@@ -2109,13 +2093,12 @@ function buildFinishTestTool(criteria) {
2109
2093
  function buildProgressiveDiscoveryTools(spans) {
2110
2094
  return {
2111
2095
  expand_trace: (0, import_ai2.tool)({
2112
- description: "Expand one or more spans to see their full details (attributes, events, content). Use a single index like 5 or a range like '10-15'.",
2096
+ description: "Expand one or more spans to see their full details (attributes, events, content). Use the span ID shown in brackets in the trace skeleton.",
2113
2097
  inputSchema: import_v44.z.object({
2114
- index: import_v44.z.number().optional().describe("Single span index to expand"),
2115
- range: import_v44.z.string().optional().describe('Range of span indices to expand, e.g. "10-15"')
2098
+ span_ids: import_v44.z.array(import_v44.z.string()).describe("Span IDs (or 8-char prefixes) to expand")
2116
2099
  }),
2117
- execute: async ({ index, range }) => {
2118
- return expandTrace(spans, { index, range });
2100
+ execute: async ({ span_ids }) => {
2101
+ return expandTrace(spans, span_ids);
2119
2102
  }
2120
2103
  }),
2121
2104
  grep_trace: (0, import_ai2.tool)({
@@ -2181,9 +2164,9 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2181
2164
  ...cfg
2182
2165
  });
2183
2166
  const tools = {
2167
+ ...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {},
2184
2168
  continue_test: buildContinueTestTool(),
2185
- finish_test: buildFinishTestTool(criteria),
2186
- ...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {}
2169
+ finish_test: buildFinishTestTool(criteria)
2187
2170
  };
2188
2171
  const enforceJudgement = input.judgmentRequest != null;
2189
2172
  const hasCriteria = criteria.length && criteria.length > 0;
@@ -2221,7 +2204,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2221
2204
  buildTraceDigest(spans) {
2222
2205
  const fullDigest = judgeSpanDigestFormatter.format(spans);
2223
2206
  const isLargeTrace = spans.length > 0 && estimateTokens(fullDigest) > this.tokenThreshold;
2224
- const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(spanIndex) to see span details or grep_trace(pattern) to search across spans." : fullDigest;
2207
+ const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(span_id) to see span details or grep_trace(pattern) to search across spans. Reference spans by the ID shown in brackets." : fullDigest;
2225
2208
  this.logger.debug("Trace digest built", {
2226
2209
  isLargeTrace,
2227
2210
  estimatedTokens: estimateTokens(fullDigest)
@@ -2233,6 +2216,10 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2233
2216
  * In multi-step mode, the AI SDK loops automatically: the judge can call
2234
2217
  * expand_trace/grep_trace tools multiple times before reaching a terminal
2235
2218
  * tool (finish_test/continue_test) or hitting the step limit.
2219
+ *
2220
+ * When the trace is large, toolChoice is relaxed to "required" so the
2221
+ * judge can freely pick discovery tools (expand_trace/grep_trace) before
2222
+ * being forced to a terminal decision.
2236
2223
  */
2237
2224
  async invokeLLMWithDiscovery({
2238
2225
  isLargeTrace,
@@ -2240,6 +2227,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2240
2227
  }) {
2241
2228
  var _a, _b;
2242
2229
  if (isLargeTrace) {
2230
+ params.toolChoice = "required";
2243
2231
  params.stopWhen = [
2244
2232
  (0, import_ai2.stepCountIs)(this.maxDiscoverySteps),
2245
2233
  (0, import_ai2.hasToolCall)("finish_test"),
@@ -3909,6 +3897,7 @@ var ScenarioExecution = class {
3909
3897
  });
3910
3898
  this.currentTurnSpan = this.tracer.startSpan("Scenario Turn", {
3911
3899
  attributes: {
3900
+ "langwatch.origin": "simulation",
3912
3901
  "scenario.name": this.config.name,
3913
3902
  "scenario.id": this.config.id,
3914
3903
  [import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
@@ -4424,12 +4413,17 @@ var EventBus = class _EventBus {
4424
4413
  }
4425
4414
  /**
4426
4415
  * Stops accepting new events and drains the processing queue.
4416
+ * Times out after the specified duration to prevent blocking indefinitely
4417
+ * when the events endpoint is slow or unavailable.
4427
4418
  */
4428
- async drain() {
4419
+ async drain(timeoutMs = 3e5) {
4429
4420
  this.logger.debug("Draining event stream");
4430
4421
  this.events$.complete();
4431
4422
  if (this.processingPromise) {
4432
- await this.processingPromise;
4423
+ await Promise.race([
4424
+ this.processingPromise,
4425
+ new Promise((resolve) => setTimeout(resolve, timeoutMs))
4426
+ ]);
4433
4427
  }
4434
4428
  }
4435
4429
  /**
package/dist/index.mjs CHANGED
@@ -1175,10 +1175,10 @@ function indexSpans(spans) {
1175
1175
  const sorted = [...spans].sort((a, b) => {
1176
1176
  return hrTimeToMs(a.startTime) - hrTimeToMs(b.startTime);
1177
1177
  });
1178
- return sorted.map((span, i) => ({
1178
+ return sorted.map((span) => ({
1179
1179
  span,
1180
1180
  children: [],
1181
- index: i + 1
1181
+ shortId: span.spanContext().spanId.slice(0, 8)
1182
1182
  }));
1183
1183
  }
1184
1184
 
@@ -1193,7 +1193,7 @@ function renderFullSpanNode(node) {
1193
1193
  const status = getStatusIndicator(span);
1194
1194
  const lines = [];
1195
1195
  lines.push(
1196
- `[${node.index}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1196
+ `[${node.shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1197
1197
  );
1198
1198
  const attrs = cleanAttributes(span.attributes);
1199
1199
  if (Object.keys(attrs).length > 0) {
@@ -1217,7 +1217,7 @@ function renderFullSpanNode(node) {
1217
1217
  function truncateToCharBudget(text) {
1218
1218
  if (text.length <= TOOL_RESULT_CHAR_BUDGET) return text;
1219
1219
  const truncated = text.slice(0, TOOL_RESULT_CHAR_BUDGET);
1220
- return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with a narrower range.";
1220
+ return truncated + "\n\n[TRUNCATED] Output exceeded ~4000 token budget. Use grep_trace(pattern) to search for specific content, or expand_trace with fewer span IDs.";
1221
1221
  }
1222
1222
  function spanToSearchableText(span) {
1223
1223
  const parts = [span.name];
@@ -1236,30 +1236,22 @@ function spanToSearchableText(span) {
1236
1236
  }
1237
1237
  return parts.join("\n");
1238
1238
  }
1239
- function expandTrace(spans, { index, range }) {
1239
+ function expandTrace(spans, spanIds) {
1240
1240
  const nodes = indexSpans(spans);
1241
1241
  if (nodes.length === 0) {
1242
1242
  return "No spans recorded.";
1243
1243
  }
1244
- let startIdx;
1245
- let endIdx;
1246
- if (range != null) {
1247
- const parts = range.split("-").map(Number);
1248
- startIdx = parts[0];
1249
- endIdx = parts[1] ?? startIdx;
1250
- } else if (index != null) {
1251
- startIdx = index;
1252
- endIdx = index;
1253
- } else {
1254
- return "Error: provide either index or range parameter.";
1244
+ if (spanIds.length === 0) {
1245
+ return "Error: provide at least one span ID.";
1255
1246
  }
1256
- const maxIndex = nodes.length;
1257
- if (startIdx < 1 || endIdx > maxIndex || startIdx > endIdx) {
1258
- return `Error: span index out of range. Valid range is 1-${maxIndex}.`;
1247
+ const selected = nodes.filter((n) => {
1248
+ const fullId = n.span.spanContext().spanId;
1249
+ return spanIds.some((prefix) => fullId.startsWith(prefix));
1250
+ });
1251
+ if (selected.length === 0) {
1252
+ const available = nodes.map((n) => n.shortId).join(", ");
1253
+ return `Error: no spans matched the given ID(s). Available span IDs: ${available}`;
1259
1254
  }
1260
- const selected = nodes.filter(
1261
- (n) => n.index >= startIdx && n.index <= endIdx
1262
- );
1263
1255
  const lines = [];
1264
1256
  for (const node of selected) {
1265
1257
  const spanLines = renderFullSpanNode(node);
@@ -1293,7 +1285,7 @@ function grepTrace(spans, pattern) {
1293
1285
  for (const { node, matchingLines } of limited) {
1294
1286
  const duration = calculateSpanDuration(node.span);
1295
1287
  lines.push(
1296
- `--- [${node.index}] ${node.span.name} (${formatDuration(duration)}) ---`
1288
+ `--- [${node.shortId}] ${node.span.name} (${formatDuration(duration)}) ---`
1297
1289
  );
1298
1290
  for (const line of matchingLines) {
1299
1291
  lines.push(` ${line}`);
@@ -1787,14 +1779,12 @@ var JudgeSpanDigestFormatter = class {
1787
1779
  `Spans: ${spans.length} | Total Duration: ${formatDuration(totalDuration)}`,
1788
1780
  ""
1789
1781
  ];
1790
- let sequence = 1;
1791
1782
  const rootCount = tree.length;
1792
1783
  tree.forEach((node, idx) => {
1793
- sequence = this.renderStructureNode(
1784
+ this.renderStructureNode(
1794
1785
  node,
1795
1786
  lines,
1796
1787
  0,
1797
- sequence,
1798
1788
  idx === rootCount - 1
1799
1789
  );
1800
1790
  });
@@ -1834,14 +1824,12 @@ var JudgeSpanDigestFormatter = class {
1834
1824
  )}`,
1835
1825
  ""
1836
1826
  ];
1837
- let sequence = 1;
1838
1827
  const rootCount = tree.length;
1839
1828
  tree.forEach((node, idx) => {
1840
- sequence = this.renderNode(
1829
+ this.renderNode(
1841
1830
  node,
1842
1831
  lines,
1843
1832
  0,
1844
- sequence,
1845
1833
  idx === rootCount - 1
1846
1834
  );
1847
1835
  });
@@ -1875,38 +1863,37 @@ var JudgeSpanDigestFormatter = class {
1875
1863
  }
1876
1864
  return roots;
1877
1865
  }
1878
- renderStructureNode(node, lines, depth, sequence, isLast = true) {
1866
+ renderStructureNode(node, lines, depth, isLast = true) {
1879
1867
  const span = node.span;
1868
+ const shortId = span.spanContext().spanId.slice(0, 8);
1880
1869
  const duration = calculateSpanDuration(span);
1881
1870
  const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
1882
1871
  const status = getStatusIndicator(span);
1883
1872
  const tokens = getTokenUsage(span);
1884
1873
  const prefix = this.getTreePrefix(depth, isLast);
1885
1874
  lines.push(
1886
- `${prefix}[${sequence}] ${timestamp} ${span.name} (${formatDuration(duration)}${tokens})${status}`
1875
+ `${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)}${tokens})${status}`
1887
1876
  );
1888
1877
  lines.push("");
1889
- let nextSeq = sequence + 1;
1890
1878
  const childCount = node.children.length;
1891
1879
  node.children.forEach((child, idx) => {
1892
- nextSeq = this.renderStructureNode(
1880
+ this.renderStructureNode(
1893
1881
  child,
1894
1882
  lines,
1895
1883
  depth + 1,
1896
- nextSeq,
1897
1884
  idx === childCount - 1
1898
1885
  );
1899
1886
  });
1900
- return nextSeq;
1901
1887
  }
1902
- renderNode(node, lines, depth, sequence, isLast = true) {
1888
+ renderNode(node, lines, depth, isLast = true) {
1903
1889
  const span = node.span;
1890
+ const shortId = span.spanContext().spanId.slice(0, 8);
1904
1891
  const duration = calculateSpanDuration(span);
1905
1892
  const timestamp = new Date(hrTimeToMs(span.startTime)).toISOString();
1906
1893
  const status = getStatusIndicator(span);
1907
1894
  const prefix = this.getTreePrefix(depth, isLast);
1908
1895
  lines.push(
1909
- `${prefix}[${sequence}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1896
+ `${prefix}[${shortId}] ${timestamp} ${span.name} (${formatDuration(duration)})${status}`
1910
1897
  );
1911
1898
  const attrIndent = this.getAttrIndent(depth, isLast);
1912
1899
  const attrs = cleanAttributes(span.attributes);
@@ -1927,18 +1914,15 @@ var JudgeSpanDigestFormatter = class {
1927
1914
  }
1928
1915
  }
1929
1916
  lines.push("");
1930
- let nextSeq = sequence + 1;
1931
1917
  const childCount = node.children.length;
1932
1918
  node.children.forEach((child, idx) => {
1933
- nextSeq = this.renderNode(
1919
+ this.renderNode(
1934
1920
  child,
1935
1921
  lines,
1936
1922
  depth + 1,
1937
- nextSeq,
1938
1923
  idx === childCount - 1
1939
1924
  );
1940
1925
  });
1941
- return nextSeq;
1942
1926
  }
1943
1927
  getTreePrefix(depth, isLast) {
1944
1928
  if (depth === 0) return "";
@@ -2049,13 +2033,12 @@ function buildFinishTestTool(criteria) {
2049
2033
  function buildProgressiveDiscoveryTools(spans) {
2050
2034
  return {
2051
2035
  expand_trace: tool({
2052
- description: "Expand one or more spans to see their full details (attributes, events, content). Use a single index like 5 or a range like '10-15'.",
2036
+ description: "Expand one or more spans to see their full details (attributes, events, content). Use the span ID shown in brackets in the trace skeleton.",
2053
2037
  inputSchema: z4.object({
2054
- index: z4.number().optional().describe("Single span index to expand"),
2055
- range: z4.string().optional().describe('Range of span indices to expand, e.g. "10-15"')
2038
+ span_ids: z4.array(z4.string()).describe("Span IDs (or 8-char prefixes) to expand")
2056
2039
  }),
2057
- execute: async ({ index, range }) => {
2058
- return expandTrace(spans, { index, range });
2040
+ execute: async ({ span_ids }) => {
2041
+ return expandTrace(spans, span_ids);
2059
2042
  }
2060
2043
  }),
2061
2044
  grep_trace: tool({
@@ -2121,9 +2104,9 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2121
2104
  ...cfg
2122
2105
  });
2123
2106
  const tools = {
2107
+ ...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {},
2124
2108
  continue_test: buildContinueTestTool(),
2125
- finish_test: buildFinishTestTool(criteria),
2126
- ...isLargeTrace ? buildProgressiveDiscoveryTools(spans) : {}
2109
+ finish_test: buildFinishTestTool(criteria)
2127
2110
  };
2128
2111
  const enforceJudgement = input.judgmentRequest != null;
2129
2112
  const hasCriteria = criteria.length && criteria.length > 0;
@@ -2161,7 +2144,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2161
2144
  buildTraceDigest(spans) {
2162
2145
  const fullDigest = judgeSpanDigestFormatter.format(spans);
2163
2146
  const isLargeTrace = spans.length > 0 && estimateTokens(fullDigest) > this.tokenThreshold;
2164
- const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(spanIndex) to see span details or grep_trace(pattern) to search across spans." : fullDigest;
2147
+ const digest = isLargeTrace ? judgeSpanDigestFormatter.formatStructureOnly(spans) + "\n\nUse expand_trace(span_id) to see span details or grep_trace(pattern) to search across spans. Reference spans by the ID shown in brackets." : fullDigest;
2165
2148
  this.logger.debug("Trace digest built", {
2166
2149
  isLargeTrace,
2167
2150
  estimatedTokens: estimateTokens(fullDigest)
@@ -2173,6 +2156,10 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2173
2156
  * In multi-step mode, the AI SDK loops automatically: the judge can call
2174
2157
  * expand_trace/grep_trace tools multiple times before reaching a terminal
2175
2158
  * tool (finish_test/continue_test) or hitting the step limit.
2159
+ *
2160
+ * When the trace is large, toolChoice is relaxed to "required" so the
2161
+ * judge can freely pick discovery tools (expand_trace/grep_trace) before
2162
+ * being forced to a terminal decision.
2176
2163
  */
2177
2164
  async invokeLLMWithDiscovery({
2178
2165
  isLargeTrace,
@@ -2180,6 +2167,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
2180
2167
  }) {
2181
2168
  var _a, _b;
2182
2169
  if (isLargeTrace) {
2170
+ params.toolChoice = "required";
2183
2171
  params.stopWhen = [
2184
2172
  stepCountIs(this.maxDiscoverySteps),
2185
2173
  hasToolCall("finish_test"),
@@ -3849,6 +3837,7 @@ var ScenarioExecution = class {
3849
3837
  });
3850
3838
  this.currentTurnSpan = this.tracer.startSpan("Scenario Turn", {
3851
3839
  attributes: {
3840
+ "langwatch.origin": "simulation",
3852
3841
  "scenario.name": this.config.name,
3853
3842
  "scenario.id": this.config.id,
3854
3843
  [attributes3.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
@@ -4371,12 +4360,17 @@ var EventBus = class _EventBus {
4371
4360
  }
4372
4361
  /**
4373
4362
  * Stops accepting new events and drains the processing queue.
4363
+ * Times out after the specified duration to prevent blocking indefinitely
4364
+ * when the events endpoint is slow or unavailable.
4374
4365
  */
4375
- async drain() {
4366
+ async drain(timeoutMs = 3e5) {
4376
4367
  this.logger.debug("Draining event stream");
4377
4368
  this.events$.complete();
4378
4369
  if (this.processingPromise) {
4379
- await this.processingPromise;
4370
+ await Promise.race([
4371
+ this.processingPromise,
4372
+ new Promise((resolve) => setTimeout(resolve, timeoutMs))
4373
+ ]);
4380
4374
  }
4381
4375
  }
4382
4376
  /**
@@ -606,12 +606,17 @@ var EventBus = class _EventBus {
606
606
  }
607
607
  /**
608
608
  * Stops accepting new events and drains the processing queue.
609
+ * Times out after the specified duration to prevent blocking indefinitely
610
+ * when the events endpoint is slow or unavailable.
609
611
  */
610
- async drain() {
612
+ async drain(timeoutMs = 3e5) {
611
613
  this.logger.debug("Draining event stream");
612
614
  this.events$.complete();
613
615
  if (this.processingPromise) {
614
- await this.processingPromise;
616
+ await Promise.race([
617
+ this.processingPromise,
618
+ new Promise((resolve) => setTimeout(resolve, timeoutMs))
619
+ ]);
615
620
  }
616
621
  }
617
622
  /**
@@ -589,12 +589,17 @@ var EventBus = class _EventBus {
589
589
  }
590
590
  /**
591
591
  * Stops accepting new events and drains the processing queue.
592
+ * Times out after the specified duration to prevent blocking indefinitely
593
+ * when the events endpoint is slow or unavailable.
592
594
  */
593
- async drain() {
595
+ async drain(timeoutMs = 3e5) {
594
596
  this.logger.debug("Draining event stream");
595
597
  this.events$.complete();
596
598
  if (this.processingPromise) {
597
- await this.processingPromise;
599
+ await Promise.race([
600
+ this.processingPromise,
601
+ new Promise((resolve) => setTimeout(resolve, timeoutMs))
602
+ ]);
598
603
  }
599
604
  }
600
605
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@langwatch/scenario",
3
- "version": "0.4.4",
3
+ "version": "0.4.6",
4
4
  "description": "A TypeScript library for testing AI agents using scenarios",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -22,7 +22,7 @@
22
22
  },
23
23
  "homepage": "https://github.com/langwatch/scenario#readme",
24
24
  "author": "LangWatch",
25
- "license": "MIT",
25
+ "license": "AGPL-3.0",
26
26
  "engines": {
27
27
  "node": ">=20",
28
28
  "pnpm": ">=8"