@langwatch/scenario 0.2.13 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,41 +1,428 @@
1
- import {
2
- AgentAdapter,
3
- AgentRole,
4
- DEFAULT_MAX_TURNS,
5
- DEFAULT_TEMPERATURE,
6
- DEFAULT_VERBOSE,
7
- EventBus,
8
- JudgeAgentAdapter,
9
- UserSimulatorAgentAdapter,
10
- allAgentRoles,
11
- defineConfig,
12
- domain_exports,
13
- generateMessageId,
14
- generateScenarioId,
15
- generateScenarioRunId,
16
- generateThreadId,
17
- getBatchRunId,
18
- getProjectConfig,
19
- scenarioProjectConfigSchema
20
- } from "./chunk-6SKQWXT7.mjs";
21
- import {
22
- Logger,
23
- getEnv
24
- } from "./chunk-OL4RFXV4.mjs";
25
- import {
26
- __export
27
- } from "./chunk-7P6ASYW6.mjs";
1
+ var __defProp = Object.defineProperty;
2
+ var __export = (target, all) => {
3
+ for (var name in all)
4
+ __defProp(target, name, { get: all[name], enumerable: true });
5
+ };
6
+
7
+ // src/tracing/setup.ts
8
+ import { setupObservability } from "langwatch/observability/node";
9
+
10
+ // src/agents/judge/judge-span-collector.ts
11
+ import { attributes } from "langwatch/observability";
12
+ var JudgeSpanCollector = class {
13
+ spans = [];
14
+ onStart() {
15
+ }
16
+ onEnd(span) {
17
+ this.spans.push(span);
18
+ }
19
+ forceFlush() {
20
+ return Promise.resolve();
21
+ }
22
+ shutdown() {
23
+ this.spans = [];
24
+ return Promise.resolve();
25
+ }
26
+ /**
27
+ * Retrieves all spans associated with a specific thread.
28
+ * @param threadId - The thread identifier to filter spans by
29
+ * @returns Array of spans for the given thread
30
+ */
31
+ getSpansForThread(threadId) {
32
+ const spanMap = /* @__PURE__ */ new Map();
33
+ for (const span of this.spans) {
34
+ spanMap.set(span.spanContext().spanId, span);
35
+ }
36
+ const belongsToThread = (span) => {
37
+ var _a;
38
+ if (span.attributes[attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
39
+ return true;
40
+ }
41
+ const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
42
+ if (parentId && spanMap.has(parentId)) {
43
+ return belongsToThread(spanMap.get(parentId));
44
+ }
45
+ return false;
46
+ };
47
+ return this.spans.filter(belongsToThread);
48
+ }
49
+ };
50
+ var judgeSpanCollector = new JudgeSpanCollector();
51
+
52
+ // src/config/env.ts
53
+ import { z } from "zod/v4";
54
+
55
+ // src/config/log-levels.ts
56
+ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
57
+ LogLevel2["ERROR"] = "ERROR";
58
+ LogLevel2["WARN"] = "WARN";
59
+ LogLevel2["INFO"] = "INFO";
60
+ LogLevel2["DEBUG"] = "DEBUG";
61
+ return LogLevel2;
62
+ })(LogLevel || {});
63
+ var LOG_LEVELS = Object.values(LogLevel);
64
+
65
+ // src/config/env.ts
66
+ var envSchema = z.object({
67
+ /**
68
+ * LangWatch API key for event reporting.
69
+ * If not provided, events will not be sent to LangWatch.
70
+ */
71
+ LANGWATCH_API_KEY: z.string().optional(),
72
+ /**
73
+ * LangWatch endpoint URL for event reporting.
74
+ * Defaults to the production LangWatch endpoint.
75
+ */
76
+ LANGWATCH_ENDPOINT: z.string().url().optional().default("https://app.langwatch.ai"),
77
+ /**
78
+ * Disables simulation report info messages when set to any truthy value.
79
+ * Useful for CI/CD environments or when you want cleaner output.
80
+ */
81
+ SCENARIO_DISABLE_SIMULATION_REPORT_INFO: z.string().optional().transform((val) => Boolean(val)),
82
+ /**
83
+ * Node environment - affects logging and behavior.
84
+ * Defaults to 'development' if not specified.
85
+ */
86
+ NODE_ENV: z.enum(["development", "production", "test"]).default("development"),
87
+ /**
88
+ * Case-insensitive log level for the scenario package.
89
+ * Defaults to 'info' if not specified.
90
+ */
91
+ LOG_LEVEL: z.string().toUpperCase().pipe(z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
92
+ /**
93
+ * Scenario batch run ID.
94
+ * If not provided, a random ID will be generated.
95
+ */
96
+ SCENARIO_BATCH_RUN_ID: z.string().optional()
97
+ });
98
+ function getEnv() {
99
+ return envSchema.parse(process.env);
100
+ }
101
+
102
+ // src/config/load.ts
103
+ import fs from "fs/promises";
104
+ import path from "path";
105
+ import { pathToFileURL } from "url";
106
+
107
+ // src/domain/index.ts
108
+ var domain_exports = {};
109
+ __export(domain_exports, {
110
+ AgentAdapter: () => AgentAdapter,
111
+ AgentRole: () => AgentRole,
112
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
113
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
114
+ JudgeAgentAdapter: () => JudgeAgentAdapter,
115
+ UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
116
+ allAgentRoles: () => allAgentRoles,
117
+ defineConfig: () => defineConfig,
118
+ scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
119
+ });
120
+
121
+ // src/domain/core/config.ts
122
+ import { z as z3 } from "zod/v4";
123
+
124
+ // src/domain/core/schemas/model.schema.ts
125
+ import { z as z2 } from "zod/v4";
126
+
127
+ // src/domain/core/constants.ts
128
+ var DEFAULT_TEMPERATURE = 0;
129
+
130
+ // src/domain/core/schemas/model.schema.ts
131
+ var modelSchema = z2.object({
132
+ model: z2.custom((val) => Boolean(val), {
133
+ message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
134
+ }).describe("The OpenAI Language Model to use for generating responses."),
135
+ temperature: z2.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
136
+ maxTokens: z2.number().optional().describe("The maximum number of tokens to generate.")
137
+ });
138
+
139
+ // src/domain/core/config.ts
140
+ var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
141
+ var scenarioProjectConfigSchema = z3.object({
142
+ defaultModel: modelSchema.optional(),
143
+ headless: z3.boolean().optional().default(headless)
144
+ }).strict();
145
+ function defineConfig(config2) {
146
+ return config2;
147
+ }
148
+
149
+ // src/domain/agents/index.ts
150
+ var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
151
+ AgentRole2["USER"] = "User";
152
+ AgentRole2["AGENT"] = "Agent";
153
+ AgentRole2["JUDGE"] = "Judge";
154
+ return AgentRole2;
155
+ })(AgentRole || {});
156
+ var allAgentRoles = [
157
+ "User" /* USER */,
158
+ "Agent" /* AGENT */,
159
+ "Judge" /* JUDGE */
160
+ ];
161
+ var AgentAdapter = class {
162
+ name;
163
+ role = "Agent" /* AGENT */;
164
+ };
165
+ var UserSimulatorAgentAdapter = class extends AgentAdapter {
166
+ name = "UserSimulatorAgent";
167
+ role = "User" /* USER */;
168
+ };
169
+ var JudgeAgentAdapter = class extends AgentAdapter {
170
+ name = "JudgeAgent";
171
+ role = "Judge" /* JUDGE */;
172
+ };
173
+
174
+ // src/domain/scenarios/index.ts
175
+ var DEFAULT_MAX_TURNS = 10;
176
+ var DEFAULT_VERBOSE = false;
177
+
178
+ // src/config/load.ts
179
+ async function loadScenarioProjectConfig() {
180
+ const cwd = process.cwd();
181
+ const configNames = [
182
+ "scenario.config.js",
183
+ "scenario.config.mjs"
184
+ ];
185
+ for (const name of configNames) {
186
+ const fullPath = path.join(cwd, name);
187
+ try {
188
+ await fs.access(fullPath);
189
+ const configModule = await import(pathToFileURL(fullPath).href);
190
+ const config2 = configModule.default || configModule;
191
+ const parsed = scenarioProjectConfigSchema.safeParse(config2);
192
+ if (!parsed.success) {
193
+ throw new Error(
194
+ `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
195
+ );
196
+ }
197
+ return parsed.data;
198
+ } catch (error) {
199
+ if (error instanceof Error && "code" in error && error.code === "ENOENT") {
200
+ continue;
201
+ }
202
+ throw error;
203
+ }
204
+ }
205
+ return await scenarioProjectConfigSchema.parseAsync({});
206
+ }
207
+
208
+ // src/utils/logger.ts
209
+ var Logger = class _Logger {
210
+ constructor(context2) {
211
+ this.context = context2;
212
+ }
213
+ /**
214
+ * Creates a logger with context (e.g., class name)
215
+ */
216
+ static create(context2) {
217
+ return new _Logger(context2);
218
+ }
219
+ /**
220
+ * Returns the current log level from environment.
221
+ * Uses a getter for clarity and idiomatic usage.
222
+ */
223
+ get LOG_LEVEL() {
224
+ return getEnv().LOG_LEVEL;
225
+ }
226
+ /**
227
+ * Returns the index of the given log level in the LOG_LEVELS array.
228
+ * @param level - The log level to get the index for.
229
+ * @returns The index of the log level in the LOG_LEVELS array.
230
+ */
231
+ getLogLevelIndexFor(level) {
232
+ return LOG_LEVELS.indexOf(level);
233
+ }
234
+ /**
235
+ * Checks if logging should occur based on LOG_LEVEL env var
236
+ */
237
+ shouldLog(level) {
238
+ const currentLevelIndex = this.getLogLevelIndexFor(this.LOG_LEVEL);
239
+ const requestedLevelIndex = this.getLogLevelIndexFor(level);
240
+ return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
241
+ }
242
+ formatMessage(message2) {
243
+ return this.context ? `[${this.context}] ${message2}` : message2;
244
+ }
245
+ error(message2, data) {
246
+ if (this.shouldLog("ERROR" /* ERROR */)) {
247
+ const formattedMessage = this.formatMessage(message2);
248
+ if (data) {
249
+ console.error(formattedMessage, data);
250
+ } else {
251
+ console.error(formattedMessage);
252
+ }
253
+ }
254
+ }
255
+ warn(message2, data) {
256
+ if (this.shouldLog("WARN" /* WARN */)) {
257
+ const formattedMessage = this.formatMessage(message2);
258
+ if (data) {
259
+ console.warn(formattedMessage, data);
260
+ } else {
261
+ console.warn(formattedMessage);
262
+ }
263
+ }
264
+ }
265
+ info(message2, data) {
266
+ if (this.shouldLog("INFO" /* INFO */)) {
267
+ const formattedMessage = this.formatMessage(message2);
268
+ if (data) {
269
+ console.info(formattedMessage, data);
270
+ } else {
271
+ console.info(formattedMessage);
272
+ }
273
+ }
274
+ }
275
+ debug(message2, data) {
276
+ if (this.shouldLog("DEBUG" /* DEBUG */)) {
277
+ const formattedMessage = this.formatMessage(message2);
278
+ if (data) {
279
+ console.log(formattedMessage, data);
280
+ } else {
281
+ console.log(formattedMessage);
282
+ }
283
+ }
284
+ }
285
+ };
286
+
287
+ // src/config/get-project-config.ts
288
+ var logger = new Logger("scenario.config");
289
+ var configLoaded = false;
290
+ var config = null;
291
+ var configLoadPromise = null;
292
+ async function loadProjectConfig() {
293
+ if (configLoaded) {
294
+ return;
295
+ }
296
+ if (configLoadPromise) {
297
+ return configLoadPromise;
298
+ }
299
+ configLoadPromise = (async () => {
300
+ try {
301
+ config = await loadScenarioProjectConfig();
302
+ logger.debug("loaded scenario project config", { config });
303
+ } catch (error) {
304
+ logger.error("error loading scenario project config", { error });
305
+ } finally {
306
+ configLoaded = true;
307
+ }
308
+ })();
309
+ return configLoadPromise;
310
+ }
311
+ async function getProjectConfig() {
312
+ await loadProjectConfig();
313
+ return config;
314
+ }
315
+
316
+ // src/tracing/setup.ts
317
+ var envConfig = getEnv();
318
+ var observabilityHandle = setupObservability({
319
+ langwatch: {
320
+ apiKey: envConfig.LANGWATCH_API_KEY,
321
+ endpoint: envConfig.LANGWATCH_ENDPOINT
322
+ },
323
+ spanProcessors: [judgeSpanCollector]
324
+ });
28
325
 
29
326
  // src/agents/index.ts
30
327
  var agents_exports = {};
31
328
  __export(agents_exports, {
329
+ JudgeSpanCollector: () => JudgeSpanCollector,
330
+ JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
331
+ RealtimeAgentAdapter: () => RealtimeAgentAdapter,
32
332
  judgeAgent: () => judgeAgent,
333
+ judgeSpanCollector: () => judgeSpanCollector,
334
+ judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
33
335
  userSimulatorAgent: () => userSimulatorAgent
34
336
  });
35
337
 
36
- // src/agents/judge-agent.ts
37
- import { generateText, tool } from "ai";
38
- import { z } from "zod";
338
+ // src/agents/judge/judge-agent.ts
339
+ import { tool } from "ai";
340
+ import { z as z4 } from "zod/v4";
341
+
342
+ // src/agents/judge/judge-utils.ts
343
+ function truncateBase64Media(value) {
344
+ var _a;
345
+ if (typeof value === "string") {
346
+ const dataUrlMatch = value.match(
347
+ /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
348
+ );
349
+ if (dataUrlMatch) {
350
+ const mimeType = dataUrlMatch[1];
351
+ const mediaType = dataUrlMatch[2].toUpperCase();
352
+ const size = dataUrlMatch[3].length;
353
+ return `[${mediaType}: ${mimeType}, ~${size} bytes]`;
354
+ }
355
+ return value;
356
+ }
357
+ if (Array.isArray(value)) {
358
+ return value.map(truncateBase64Media);
359
+ }
360
+ if (value && typeof value === "object") {
361
+ const obj = value;
362
+ if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
363
+ const mediaType = obj.mediaType;
364
+ const category = ((_a = mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
365
+ return {
366
+ ...obj,
367
+ data: `[${category}: ${mediaType}, ~${obj.data.length} bytes]`
368
+ };
369
+ }
370
+ if (obj.type === "image" && typeof obj.image === "string") {
371
+ const imageData = obj.image;
372
+ const dataUrlMatch = imageData.match(
373
+ /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
374
+ );
375
+ if (dataUrlMatch) {
376
+ return {
377
+ ...obj,
378
+ image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
379
+ };
380
+ }
381
+ if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
382
+ return {
383
+ ...obj,
384
+ image: `[IMAGE: unknown, ~${imageData.length} bytes]`
385
+ };
386
+ }
387
+ }
388
+ const result = {};
389
+ for (const [key, val] of Object.entries(obj)) {
390
+ result[key] = truncateBase64Media(val);
391
+ }
392
+ return result;
393
+ }
394
+ return value;
395
+ }
396
+ var JudgeUtils = {
397
+ /**
398
+ * Builds a minimal transcript from messages for judge evaluation.
399
+ * Truncates base64 media to reduce token usage.
400
+ * @param messages - Array of CoreMessage from conversation
401
+ * @returns Plain text transcript with one message per line
402
+ */
403
+ buildTranscriptFromMessages(messages) {
404
+ return messages.map((msg) => {
405
+ const truncatedContent = truncateBase64Media(msg.content);
406
+ return `${msg.role}: ${JSON.stringify(truncatedContent)}`;
407
+ }).join("\n");
408
+ }
409
+ };
410
+
411
+ // src/agents/llm-invoker.factory.ts
412
+ import { generateText } from "ai";
413
+ var createLLMInvoker = (logger2) => {
414
+ return async (params) => {
415
+ try {
416
+ return await generateText({
417
+ ...params,
418
+ experimental_telemetry: { isEnabled: true }
419
+ });
420
+ } catch (error) {
421
+ logger2.error("Error generating text", { error });
422
+ throw error;
423
+ }
424
+ };
425
+ };
39
426
 
40
427
  // src/agents/utils.ts
41
428
  var toolMessageRole = "tool";
@@ -92,27 +479,308 @@ var criterionToParamName = (criterion) => {
92
479
  return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
93
480
  };
94
481
 
95
- // src/utils/config.ts
96
- function mergeConfig(config, projectConfig) {
97
- if (!projectConfig) {
98
- return config;
482
+ // src/agents/judge/judge-span-digest-formatter.ts
483
+ import { attributes as attributes2 } from "langwatch/observability";
484
+
485
+ // src/agents/judge/deep-transform.ts
486
+ function deepTransform(value, fn) {
487
+ const result = fn(value);
488
+ if (result !== value) return result;
489
+ if (Array.isArray(value)) {
490
+ return value.map((v) => deepTransform(v, fn));
491
+ }
492
+ if (value !== null && typeof value === "object") {
493
+ const out = {};
494
+ for (const [k, v] of Object.entries(value)) {
495
+ out[k] = deepTransform(v, fn);
496
+ }
497
+ return out;
498
+ }
499
+ return value;
500
+ }
501
+
502
+ // src/agents/judge/string-deduplicator.ts
503
+ var StringDeduplicator = class {
504
+ seen = /* @__PURE__ */ new Map();
505
+ threshold;
506
+ constructor(params) {
507
+ this.threshold = params.threshold;
99
508
  }
100
- return {
101
- ...projectConfig.defaultModel,
102
- ...config
103
- };
509
+ /**
510
+ * Resets seen strings for a new digest.
511
+ */
512
+ reset() {
513
+ this.seen.clear();
514
+ }
515
+ /**
516
+ * Processes a string, returning duplicate marker if seen before.
517
+ * @param str - String to process
518
+ * @returns Original string or duplicate marker
519
+ */
520
+ process(str) {
521
+ if (str.length < this.threshold) return str;
522
+ const key = this.normalize(str);
523
+ if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
524
+ this.seen.set(key, true);
525
+ return str;
526
+ }
527
+ /**
528
+ * Normalizes string for comparison (whitespace, case).
529
+ */
530
+ normalize(str) {
531
+ return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
532
+ }
533
+ };
534
+
535
+ // src/agents/judge/truncate-media.ts
536
+ function truncateMediaUrl(str) {
537
+ const match = str.match(
538
+ /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
539
+ );
540
+ if (!match) return str;
541
+ const [, mimeType, category, data] = match;
542
+ return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
104
543
  }
105
- function mergeAndValidateConfig(config, projectConfig) {
544
+ function truncateMediaPart(v) {
106
545
  var _a;
107
- const mergedConfig = mergeConfig(config, projectConfig);
108
- mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
109
- if (!mergedConfig.model) {
110
- throw new Error("Model is required");
546
+ if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
547
+ const obj = v;
548
+ if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
549
+ const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
550
+ return {
551
+ ...obj,
552
+ data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
553
+ };
554
+ }
555
+ if (obj.type === "image" && typeof obj.image === "string") {
556
+ const imageData = obj.image;
557
+ const dataUrlMatch = imageData.match(
558
+ /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
559
+ );
560
+ if (dataUrlMatch) {
561
+ return {
562
+ ...obj,
563
+ image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
564
+ };
565
+ }
566
+ if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
567
+ return {
568
+ ...obj,
569
+ image: `[IMAGE: unknown, ~${imageData.length} bytes]`
570
+ };
571
+ }
111
572
  }
112
- return mergedConfig;
573
+ return null;
113
574
  }
114
575
 
115
- // src/agents/judge-agent.ts
576
+ // src/agents/judge/judge-span-digest-formatter.ts
577
+ var JudgeSpanDigestFormatter = class {
578
+ logger = new Logger("JudgeSpanDigestFormatter");
579
+ deduplicator = new StringDeduplicator({ threshold: 50 });
580
+ /**
581
+ * Formats spans into a complete digest with full content and nesting.
582
+ * @param spans - All spans for a thread
583
+ * @returns Plain text digest
584
+ */
585
+ format(spans) {
586
+ this.deduplicator.reset();
587
+ this.logger.debug("format() called", {
588
+ spanCount: spans.length,
589
+ spanNames: spans.map((s) => s.name)
590
+ });
591
+ if (spans.length === 0) {
592
+ this.logger.debug("No spans to format");
593
+ return "No spans recorded.";
594
+ }
595
+ const sortedSpans = this.sortByStartTime(spans);
596
+ const tree = this.buildHierarchy(sortedSpans);
597
+ const totalDuration = this.calculateTotalDuration(sortedSpans);
598
+ this.logger.debug("Hierarchy built", {
599
+ rootCount: tree.length,
600
+ totalDuration
601
+ });
602
+ const lines = [
603
+ `Spans: ${spans.length} | Total Duration: ${this.formatDuration(
604
+ totalDuration
605
+ )}`,
606
+ ""
607
+ ];
608
+ let sequence = 1;
609
+ const rootCount = tree.length;
610
+ tree.forEach((node, idx) => {
611
+ sequence = this.renderNode(
612
+ node,
613
+ lines,
614
+ 0,
615
+ sequence,
616
+ idx === rootCount - 1
617
+ );
618
+ });
619
+ const errors = this.collectErrors(spans);
620
+ if (errors.length > 0) {
621
+ lines.push("");
622
+ lines.push("=== ERRORS ===");
623
+ errors.forEach((e) => lines.push(e));
624
+ }
625
+ return lines.join("\n");
626
+ }
627
+ sortByStartTime(spans) {
628
+ return [...spans].sort((a, b) => {
629
+ const aTime = this.hrTimeToMs(a.startTime);
630
+ const bTime = this.hrTimeToMs(b.startTime);
631
+ return aTime - bTime;
632
+ });
633
+ }
634
+ buildHierarchy(spans) {
635
+ var _a;
636
+ const spanMap = /* @__PURE__ */ new Map();
637
+ const roots = [];
638
+ for (const span of spans) {
639
+ spanMap.set(span.spanContext().spanId, { span, children: [] });
640
+ }
641
+ for (const span of spans) {
642
+ const node = spanMap.get(span.spanContext().spanId);
643
+ const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
644
+ if (parentId && spanMap.has(parentId)) {
645
+ spanMap.get(parentId).children.push(node);
646
+ } else {
647
+ roots.push(node);
648
+ }
649
+ }
650
+ return roots;
651
+ }
652
+ renderNode(node, lines, depth, sequence, isLast = true) {
653
+ const span = node.span;
654
+ const duration = this.calculateSpanDuration(span);
655
+ const timestamp = this.formatTimestamp(span.startTime);
656
+ const status = this.getStatusIndicator(span);
657
+ const prefix = this.getTreePrefix(depth, isLast);
658
+ lines.push(
659
+ `${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
660
+ );
661
+ const attrIndent = this.getAttrIndent(depth, isLast);
662
+ const attrs = this.cleanAttributes(span.attributes);
663
+ if (Object.keys(attrs).length > 0) {
664
+ for (const [key, value] of Object.entries(attrs)) {
665
+ lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
666
+ }
667
+ }
668
+ if (span.events.length > 0) {
669
+ for (const event of span.events) {
670
+ lines.push(`${attrIndent}[event] ${event.name}`);
671
+ if (event.attributes) {
672
+ const eventAttrs = this.cleanAttributes(event.attributes);
673
+ for (const [key, value] of Object.entries(eventAttrs)) {
674
+ lines.push(`${attrIndent} ${key}: ${this.formatValue(value)}`);
675
+ }
676
+ }
677
+ }
678
+ }
679
+ lines.push("");
680
+ let nextSeq = sequence + 1;
681
+ const childCount = node.children.length;
682
+ node.children.forEach((child, idx) => {
683
+ nextSeq = this.renderNode(
684
+ child,
685
+ lines,
686
+ depth + 1,
687
+ nextSeq,
688
+ idx === childCount - 1
689
+ );
690
+ });
691
+ return nextSeq;
692
+ }
693
+ getTreePrefix(depth, isLast) {
694
+ if (depth === 0) return "";
695
+ const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
696
+ return "\u2502 ".repeat(depth - 1) + connector;
697
+ }
698
+ getAttrIndent(depth, isLast) {
699
+ if (depth === 0) return " ";
700
+ const continuation = isLast ? " " : "\u2502 ";
701
+ return "\u2502 ".repeat(depth - 1) + continuation + " ";
702
+ }
703
+ cleanAttributes(attrs) {
704
+ const cleaned = {};
705
+ const seen = /* @__PURE__ */ new Set();
706
+ const excludedKeys = [
707
+ attributes2.ATTR_LANGWATCH_THREAD_ID,
708
+ "langwatch.scenario.id",
709
+ "langwatch.scenario.name"
710
+ ];
711
+ for (const [key, value] of Object.entries(attrs)) {
712
+ if (excludedKeys.includes(key)) {
713
+ continue;
714
+ }
715
+ const cleanKey = key.replace(/^(langwatch)\./, "");
716
+ if (!seen.has(cleanKey)) {
717
+ seen.add(cleanKey);
718
+ cleaned[cleanKey] = value;
719
+ }
720
+ }
721
+ return cleaned;
722
+ }
723
+ formatValue(value) {
724
+ const processed = this.transformValue(value);
725
+ return typeof processed === "string" ? processed : JSON.stringify(processed);
726
+ }
727
+ transformValue(value) {
728
+ return deepTransform(value, (v) => {
729
+ const mediaPart = truncateMediaPart(v);
730
+ if (mediaPart) return mediaPart;
731
+ if (typeof v !== "string") return v;
732
+ return this.transformString(v);
733
+ });
734
+ }
735
+ transformString(str) {
736
+ if (this.looksLikeJson(str)) {
737
+ try {
738
+ const processed = this.transformValue(JSON.parse(str));
739
+ return JSON.stringify(processed);
740
+ } catch {
741
+ }
742
+ }
743
+ const truncated = truncateMediaUrl(str);
744
+ if (truncated !== str) return truncated;
745
+ return this.deduplicator.process(str);
746
+ }
747
+ looksLikeJson(str) {
748
+ const t = str.trim();
749
+ return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
750
+ }
751
+ hrTimeToMs(hrTime) {
752
+ return hrTime[0] * 1e3 + hrTime[1] / 1e6;
753
+ }
754
+ calculateSpanDuration(span) {
755
+ return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
756
+ }
757
+ calculateTotalDuration(spans) {
758
+ if (spans.length === 0) return 0;
759
+ const first = this.hrTimeToMs(spans[0].startTime);
760
+ const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
761
+ return last - first;
762
+ }
763
+ formatDuration(ms) {
764
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
765
+ return `${(ms / 1e3).toFixed(2)}s`;
766
+ }
767
+ formatTimestamp(hrTime) {
768
+ const ms = this.hrTimeToMs(hrTime);
769
+ return new Date(ms).toISOString();
770
+ }
771
+ getStatusIndicator(span) {
772
+ if (span.status.code === 2) {
773
+ return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
774
+ }
775
+ return "";
776
+ }
777
+ collectErrors(spans) {
778
+ return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
779
+ }
780
+ };
781
+ var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
782
+
783
+ // src/agents/judge/judge-agent.ts
116
784
  function buildSystemPrompt(criteria, description) {
117
785
  const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
118
786
  return `
@@ -142,24 +810,24 @@ ${criteriaList}
142
810
  function buildContinueTestTool() {
143
811
  return tool({
144
812
  description: "Continue the test with the next step",
145
- parameters: z.object({})
813
+ inputSchema: z4.object({})
146
814
  });
147
815
  }
148
816
  function buildFinishTestTool(criteria) {
149
817
  const criteriaNames = criteria.map(criterionToParamName);
150
818
  return tool({
151
819
  description: "Complete the test with a final verdict",
152
- parameters: z.object({
153
- criteria: z.object(
820
+ inputSchema: z4.object({
821
+ criteria: z4.object(
154
822
  Object.fromEntries(
155
823
  criteriaNames.map((name, idx) => [
156
824
  name,
157
- z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
825
+ z4.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
158
826
  ])
159
827
  )
160
828
  ).strict().describe("Strict verdict for each criterion"),
161
- reasoning: z.string().describe("Explanation of what the final verdict should be"),
162
- verdict: z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
829
+ reasoning: z4.string().describe("Explanation of what the final verdict should be"),
830
+ verdict: z4.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
163
831
  })
164
832
  });
165
833
  }
@@ -168,25 +836,47 @@ var JudgeAgent = class extends JudgeAgentAdapter {
168
836
  super();
169
837
  this.cfg = cfg;
170
838
  this.criteria = cfg.criteria;
171
- this.role = "Judge" /* JUDGE */;
839
+ this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
172
840
  }
173
841
  logger = new Logger("JudgeAgent");
842
+ spanCollector;
174
843
  role = "Judge" /* JUDGE */;
175
844
  criteria;
845
+ /**
846
+ * LLM invocation function. Can be overridden to customize LLM behavior.
847
+ */
848
+ invokeLLM = createLLMInvoker(this.logger);
176
849
  async call(input) {
177
- var _a;
850
+ var _a, _b, _c;
851
+ this.logger.debug("call() invoked", {
852
+ threadId: input.threadId,
853
+ currentTurn: input.scenarioState.currentTurn,
854
+ maxTurns: input.scenarioConfig.maxTurns,
855
+ judgmentRequest: input.judgmentRequest
856
+ });
857
+ const digest = this.getOpenTelemetryTracesDigest(input.threadId);
858
+ this.logger.debug("OpenTelemetry traces built", { digest });
859
+ const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
860
+ const contentForJudge = `
861
+ <transcript>
862
+ ${transcript}
863
+ </transcript>
864
+ <opentelemetry_traces>
865
+ ${digest}
866
+ </opentelemetry_traces>
867
+ `;
178
868
  const cfg = this.cfg;
179
869
  const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
180
870
  const messages = [
181
871
  { role: "system", content: systemPrompt },
182
- ...input.messages
872
+ { role: "user", content: contentForJudge }
183
873
  ];
184
874
  const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
185
875
  const projectConfig = await getProjectConfig();
186
- const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
187
- if (!mergedConfig.model) {
188
- throw new Error("Model is required for the judge agent");
189
- }
876
+ const mergedConfig = modelSchema.parse({
877
+ ...projectConfig == null ? void 0 : projectConfig.defaultModel,
878
+ ...cfg
879
+ });
190
880
  const tools = {
191
881
  continue_test: buildContinueTestTool(),
192
882
  finish_test: buildFinishTestTool(cfg.criteria)
@@ -196,27 +886,39 @@ var JudgeAgent = class extends JudgeAgentAdapter {
196
886
  if (enforceJudgement && !hasCriteria) {
197
887
  return {
198
888
  success: false,
199
- messages: [],
200
889
  reasoning: "JudgeAgent: No criteria was provided to be judged against",
201
890
  metCriteria: [],
202
891
  unmetCriteria: []
203
892
  };
204
893
  }
205
894
  const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
206
- const completion = await this.generateText({
895
+ this.logger.debug("Calling LLM", {
896
+ model: mergedConfig.model,
897
+ toolChoice,
898
+ isLastMessage,
899
+ enforceJudgement
900
+ });
901
+ const completion = await this.invokeLLM({
207
902
  model: mergedConfig.model,
208
903
  messages,
209
904
  temperature: mergedConfig.temperature ?? 0,
210
- maxTokens: mergedConfig.maxTokens,
905
+ maxOutputTokens: mergedConfig.maxTokens,
211
906
  tools,
212
907
  toolChoice
213
908
  });
909
+ this.logger.debug("LLM response received", {
910
+ toolCallCount: ((_a = completion.toolCalls) == null ? void 0 : _a.length) ?? 0,
911
+ toolCalls: (_b = completion.toolCalls) == null ? void 0 : _b.map((tc) => ({
912
+ toolName: tc.toolName,
913
+ args: tc.input
914
+ }))
915
+ });
214
916
  let args;
215
- if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
917
+ if ((_c = completion.toolCalls) == null ? void 0 : _c.length) {
216
918
  const toolCall = completion.toolCalls[0];
217
919
  switch (toolCall.toolName) {
218
920
  case "finish_test": {
219
- args = toolCall.args;
921
+ args = toolCall.input;
220
922
  const verdict = args.verdict || "inconclusive";
221
923
  const reasoning = args.reasoning || "No reasoning provided";
222
924
  const criteria = args.criteria || {};
@@ -227,20 +929,21 @@ var JudgeAgent = class extends JudgeAgentAdapter {
227
929
  const unmetCriteria = cfg.criteria.filter(
228
930
  (_, i) => criteriaValues[i] !== "true"
229
931
  );
230
- return {
932
+ const result = {
231
933
  success: verdict === "success",
232
- messages: input.messages,
233
934
  reasoning,
234
935
  metCriteria,
235
936
  unmetCriteria
236
937
  };
938
+ this.logger.debug("finish_test result", result);
939
+ return result;
237
940
  }
238
941
  case "continue_test":
239
- return [];
942
+ this.logger.debug("continue_test - proceeding to next turn");
943
+ return null;
240
944
  default:
241
945
  return {
242
946
  success: false,
243
- messages: input.messages,
244
947
  reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
245
948
  metCriteria: [],
246
949
  unmetCriteria: cfg.criteria
@@ -249,19 +952,15 @@ var JudgeAgent = class extends JudgeAgentAdapter {
249
952
  }
250
953
  return {
251
954
  success: false,
252
- messages: input.messages,
253
955
  reasoning: `JudgeAgent: No tool call found in LLM output`,
254
956
  metCriteria: [],
255
957
  unmetCriteria: cfg.criteria
256
958
  };
257
959
  }
258
- async generateText(input) {
259
- try {
260
- return await generateText(input);
261
- } catch (error) {
262
- this.logger.error("Error generating text", { error });
263
- throw error;
264
- }
960
+ getOpenTelemetryTracesDigest(threadId) {
961
+ const spans = this.spanCollector.getSpansForThread(threadId);
962
+ const digest = judgeSpanDigestFormatter.format(spans);
963
+ return digest;
265
964
  }
266
965
  };
267
966
  var judgeAgent = (cfg) => {
@@ -269,7 +968,6 @@ var judgeAgent = (cfg) => {
269
968
  };
270
969
 
271
970
  // src/agents/user-simulator-agent.ts
272
- import { generateText as generateText2 } from "ai";
273
971
  function buildSystemPrompt2(description) {
274
972
  return `
275
973
  <role>
@@ -285,69 +983,1203 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
285
983
  ${description}
286
984
  </scenario>
287
985
 
288
- <rules>
289
- - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
290
- </rules>
291
- `.trim();
292
- }
293
- var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
294
- constructor(cfg) {
295
- super();
296
- this.cfg = cfg;
297
- }
298
- logger = new Logger(this.constructor.name);
299
- call = async (input) => {
300
- const config = this.cfg;
301
- const systemPrompt = (config == null ? void 0 : config.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
302
- const messages = [
303
- { role: "system", content: systemPrompt },
304
- { role: "assistant", content: "Hello, how can I help you today" },
305
- ...input.messages
306
- ];
307
- const projectConfig = await getProjectConfig();
308
- const mergedConfig = mergeAndValidateConfig(config ?? {}, projectConfig);
309
- if (!mergedConfig.model) {
310
- throw new Error("Model is required for the user simulator agent");
986
+ <rules>
987
+ - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
988
+ </rules>
989
+ `.trim();
990
+ }
991
+ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
992
+ constructor(cfg) {
993
+ super();
994
+ this.cfg = cfg;
995
+ }
996
+ logger = new Logger(this.constructor.name);
997
+ /**
998
+ * LLM invocation function. Can be overridden to customize LLM behavior.
999
+ */
1000
+ invokeLLM = createLLMInvoker(this.logger);
1001
+ call = async (input) => {
1002
+ const config2 = this.cfg;
1003
+ const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
1004
+ const messages = [
1005
+ { role: "system", content: systemPrompt },
1006
+ { role: "assistant", content: "Hello, how can I help you today" },
1007
+ ...input.messages
1008
+ ];
1009
+ const projectConfig = await getProjectConfig();
1010
+ const mergedConfig = modelSchema.parse({
1011
+ ...projectConfig == null ? void 0 : projectConfig.defaultModel,
1012
+ ...config2
1013
+ });
1014
+ const reversedMessages = messageRoleReversal(messages);
1015
+ const completion = await this.invokeLLM({
1016
+ model: mergedConfig.model,
1017
+ messages: reversedMessages,
1018
+ temperature: mergedConfig.temperature,
1019
+ maxOutputTokens: mergedConfig.maxTokens
1020
+ });
1021
+ const messageContent = completion.text;
1022
+ if (!messageContent) {
1023
+ throw new Error("No response content from LLM");
1024
+ }
1025
+ return { role: "user", content: messageContent };
1026
+ };
1027
+ };
1028
+ var userSimulatorAgent = (config2) => {
1029
+ return new UserSimulatorAgent(config2);
1030
+ };
1031
+
1032
+ // src/agents/realtime/realtime-agent.adapter.ts
1033
+ import { EventEmitter } from "events";
1034
+
1035
+ // src/agents/realtime/message-processor.ts
1036
+ var MessageProcessor = class {
1037
+ /**
1038
+ * Processes audio message content and extracts base64 audio data
1039
+ *
1040
+ * @param content - The message content to process
1041
+ * @returns Base64 audio data string or null if no audio found
1042
+ * @throws {Error} If audio data is invalid
1043
+ */
1044
+ processAudioMessage(content) {
1045
+ if (!Array.isArray(content)) {
1046
+ return null;
1047
+ }
1048
+ for (const part of content) {
1049
+ if (typeof part === "object" && part !== null && "type" in part && part.type === "file" && "mediaType" in part && typeof part.mediaType === "string" && part.mediaType.startsWith("audio/")) {
1050
+ if (!("data" in part) || typeof part.data !== "string") {
1051
+ throw new Error(
1052
+ `Audio data must be base64 string, got: ${typeof part.data}`
1053
+ );
1054
+ }
1055
+ if (!part.data || part.data.length === 0) {
1056
+ throw new Error(
1057
+ `Audio message has no data. Part: ${JSON.stringify(part)}`
1058
+ );
1059
+ }
1060
+ return part.data;
1061
+ }
1062
+ }
1063
+ return null;
1064
+ }
1065
+ /**
1066
+ * Extracts text content from message content
1067
+ *
1068
+ * @param content - The message content to process
1069
+ * @returns Text string or empty string if no text found
1070
+ */
1071
+ extractTextMessage(content) {
1072
+ return typeof content === "string" ? content : "";
1073
+ }
1074
+ /**
1075
+ * Validates that a message has either text or audio content
1076
+ *
1077
+ * @param content - The message content to validate
1078
+ * @returns True if the message has valid content
1079
+ */
1080
+ hasValidContent(content) {
1081
+ const hasText = this.extractTextMessage(content).length > 0;
1082
+ const hasAudio = this.processAudioMessage(content) !== null;
1083
+ return hasText || hasAudio;
1084
+ }
1085
+ };
1086
+
1087
+ // src/agents/realtime/realtime-event-handler.ts
1088
+ var RealtimeEventHandler = class {
1089
+ /**
1090
+ * Creates a new RealtimeEventHandler instance
1091
+ * @param session - The RealtimeSession to listen to events from
1092
+ */
1093
+ constructor(session) {
1094
+ this.session = session;
1095
+ this.ensureEventListeners();
1096
+ }
1097
+ currentResponse = "";
1098
+ currentAudioChunks = [];
1099
+ responseResolver = null;
1100
+ errorRejecter = null;
1101
+ listenersSetup = false;
1102
+ /**
1103
+ * Gets the transport from the session
1104
+ */
1105
+ getTransport() {
1106
+ const sessionWithTransport = this.session;
1107
+ return sessionWithTransport.transport ?? null;
1108
+ }
1109
+ /**
1110
+ * Ensures event listeners are set up, retrying if transport not available
1111
+ */
1112
+ ensureEventListeners() {
1113
+ if (this.listenersSetup) return;
1114
+ const transport = this.getTransport();
1115
+ if (!transport) {
1116
+ setTimeout(() => this.ensureEventListeners(), 100);
1117
+ return;
1118
+ }
1119
+ this.setupEventListeners();
1120
+ }
1121
+ /**
1122
+ * Sets up event listeners for the RealtimeSession transport layer
1123
+ */
1124
+ setupEventListeners() {
1125
+ if (this.listenersSetup) return;
1126
+ const transport = this.getTransport();
1127
+ if (!transport) {
1128
+ console.error("\u274C Transport not available on session");
1129
+ return;
1130
+ }
1131
+ transport.on("response.output_audio_transcript.delta", (event) => {
1132
+ const deltaEvent = event;
1133
+ if (typeof deltaEvent.delta === "string") {
1134
+ this.currentResponse += deltaEvent.delta;
1135
+ }
1136
+ });
1137
+ transport.on("response.output_audio.delta", (event) => {
1138
+ const deltaEvent = event;
1139
+ if (typeof deltaEvent.delta === "string") {
1140
+ this.currentAudioChunks.push(deltaEvent.delta);
1141
+ }
1142
+ });
1143
+ transport.on("response.done", () => {
1144
+ const fullAudio = this.currentAudioChunks.join("");
1145
+ const audioResponse = {
1146
+ transcript: this.currentResponse,
1147
+ audio: fullAudio
1148
+ };
1149
+ if (this.responseResolver) {
1150
+ this.responseResolver(audioResponse);
1151
+ this.reset();
1152
+ }
1153
+ });
1154
+ transport.on("error", (error) => {
1155
+ console.error(`\u274C Transport error:`, error);
1156
+ if (this.errorRejecter) {
1157
+ const errorObj = error instanceof Error ? error : new Error(String(error));
1158
+ this.errorRejecter(errorObj);
1159
+ this.reset();
1160
+ }
1161
+ });
1162
+ this.listenersSetup = true;
1163
+ }
1164
+ /**
1165
+ * Waits for the agent response with timeout
1166
+ *
1167
+ * @param timeout - Maximum time to wait in milliseconds
1168
+ * @returns Promise that resolves with the audio response event
1169
+ * @throws {Error} If timeout occurs or transport error happens
1170
+ */
1171
+ waitForResponse(timeout) {
1172
+ return new Promise((resolve, reject) => {
1173
+ this.responseResolver = resolve;
1174
+ this.errorRejecter = reject;
1175
+ const timeoutId = setTimeout(() => {
1176
+ if (this.responseResolver) {
1177
+ this.reset();
1178
+ reject(new Error(`Agent response timeout after ${timeout}ms`));
1179
+ }
1180
+ }, timeout);
1181
+ const originalResolver = resolve;
1182
+ this.responseResolver = (value) => {
1183
+ clearTimeout(timeoutId);
1184
+ originalResolver(value);
1185
+ };
1186
+ });
1187
+ }
1188
+ /**
1189
+ * Resets the internal state for the next response
1190
+ */
1191
+ reset() {
1192
+ this.responseResolver = null;
1193
+ this.errorRejecter = null;
1194
+ this.currentResponse = "";
1195
+ this.currentAudioChunks = [];
1196
+ }
1197
+ };
1198
+
1199
+ // src/agents/realtime/response-formatter.ts
1200
+ var ResponseFormatter = class {
1201
+ /**
1202
+ * Formats an audio response event into Scenario framework format
1203
+ *
1204
+ * @param audioEvent - The audio response event from the Realtime API
1205
+ * @returns Formatted assistant message with audio and text content
1206
+ */
1207
+ formatAudioResponse(audioEvent) {
1208
+ return {
1209
+ role: "assistant",
1210
+ content: [
1211
+ { type: "text", text: audioEvent.transcript },
1212
+ { type: "file", mediaType: "audio/pcm16", data: audioEvent.audio }
1213
+ ]
1214
+ };
1215
+ }
1216
+ /**
1217
+ * Formats a text response for the Scenario framework
1218
+ *
1219
+ * @param text - The text response from the agent
1220
+ * @returns Plain text response string
1221
+ */
1222
+ formatTextResponse(text) {
1223
+ return text;
1224
+ }
1225
+ /**
1226
+ * Creates an initial response message for when no user message exists
1227
+ *
1228
+ * @param audioEvent - The audio response event from the Realtime API
1229
+ * @returns Formatted assistant message for initial responses
1230
+ */
1231
+ formatInitialResponse(audioEvent) {
1232
+ return this.formatAudioResponse(audioEvent);
1233
+ }
1234
+ };
1235
+
1236
+ // src/agents/realtime/realtime-agent.adapter.ts
1237
+ var RealtimeAgentAdapter = class extends AgentAdapter {
1238
+ /**
1239
+ * Creates a new RealtimeAgentAdapter instance
1240
+ *
1241
+ * The session can be either connected or unconnected.
1242
+ * If unconnected, call connect() with an API key before use.
1243
+ *
1244
+ * @param config - Configuration for the realtime agent adapter
1245
+ */
1246
+ constructor(config2) {
1247
+ super();
1248
+ this.config = config2;
1249
+ this.role = this.config.role;
1250
+ this.name = this.config.agentName;
1251
+ this.session = config2.session;
1252
+ this.eventHandler = new RealtimeEventHandler(this.session);
1253
+ }
1254
+ role;
1255
+ name;
1256
+ session;
1257
+ eventHandler;
1258
+ messageProcessor = new MessageProcessor();
1259
+ responseFormatter = new ResponseFormatter();
1260
+ audioEvents = new EventEmitter();
1261
+ /**
1262
+ * Get the connect method from the session
1263
+ */
1264
+ async connect(params) {
1265
+ const { apiKey, ...rest } = params ?? {};
1266
+ await this.session.connect({
1267
+ apiKey: apiKey ?? process.env.OPENAI_API_KEY,
1268
+ ...rest
1269
+ });
1270
+ }
1271
+ /**
1272
+ * Closes the session connection
1273
+ */
1274
+ async disconnect() {
1275
+ this.session.close();
1276
+ }
1277
+ /**
1278
+ * Process input and generate response (implements AgentAdapter interface)
1279
+ *
1280
+ * This is called by Scenario framework for each agent turn.
1281
+ * Handles both text and audio input, returns audio message with transcript.
1282
+ *
1283
+ * @param input - Scenario agent input with message history
1284
+ * @returns Agent response as audio message or text
1285
+ */
1286
+ async call(input) {
1287
+ console.log(`\u{1F50A} [${this.name}] being called with role: ${this.role}`);
1288
+ const latestMessage = input.newMessages[input.newMessages.length - 1];
1289
+ if (!latestMessage) {
1290
+ return this.handleInitialResponse();
1291
+ }
1292
+ const audioData = this.messageProcessor.processAudioMessage(
1293
+ latestMessage.content
1294
+ );
1295
+ if (audioData) {
1296
+ return this.handleAudioInput(audioData);
1297
+ }
1298
+ const text = this.messageProcessor.extractTextMessage(
1299
+ latestMessage.content
1300
+ );
1301
+ if (!text) {
1302
+ throw new Error("Message has no text or audio content");
1303
+ }
1304
+ return this.handleTextInput(text);
1305
+ }
1306
+ /**
1307
+ * Handles the initial response when no user message exists
1308
+ */
1309
+ async handleInitialResponse() {
1310
+ console.log(`[${this.name}] First message, creating response`);
1311
+ const sessionWithTransport = this.session;
1312
+ const transport = sessionWithTransport.transport;
1313
+ if (!transport) {
1314
+ throw new Error("Realtime transport not available");
1315
+ }
1316
+ transport.sendEvent({
1317
+ type: "response.create"
1318
+ });
1319
+ const timeout = this.config.responseTimeout ?? 6e4;
1320
+ const response = await this.eventHandler.waitForResponse(timeout);
1321
+ this.audioEvents.emit("audioResponse", response);
1322
+ return this.responseFormatter.formatInitialResponse(response);
1323
+ }
1324
+ /**
1325
+ * Handles audio input from the user
1326
+ */
1327
+ async handleAudioInput(audioData) {
1328
+ const sessionWithTransport = this.session;
1329
+ const transport = sessionWithTransport.transport;
1330
+ if (!transport) {
1331
+ throw new Error("Realtime transport not available");
1332
+ }
1333
+ transport.sendEvent({
1334
+ type: "input_audio_buffer.append",
1335
+ audio: audioData
1336
+ });
1337
+ transport.sendEvent({
1338
+ type: "input_audio_buffer.commit"
1339
+ });
1340
+ transport.sendEvent({
1341
+ type: "response.create"
1342
+ });
1343
+ const timeout = this.config.responseTimeout ?? 6e4;
1344
+ const response = await this.eventHandler.waitForResponse(timeout);
1345
+ this.audioEvents.emit("audioResponse", response);
1346
+ return this.responseFormatter.formatAudioResponse(response);
1347
+ }
1348
+ /**
1349
+ * Handles text input from the user
1350
+ */
1351
+ async handleTextInput(text) {
1352
+ this.session.sendMessage(text);
1353
+ const timeout = this.config.responseTimeout ?? 3e4;
1354
+ const response = await this.eventHandler.waitForResponse(timeout);
1355
+ this.audioEvents.emit("audioResponse", response);
1356
+ return this.responseFormatter.formatTextResponse(response.transcript);
1357
+ }
1358
+ /**
1359
+ * Subscribe to audio response events
1360
+ *
1361
+ * @param callback - Function called when an audio response completes
1362
+ */
1363
+ onAudioResponse(callback) {
1364
+ this.audioEvents.on("audioResponse", callback);
1365
+ }
1366
+ /**
1367
+ * Remove audio response listener
1368
+ *
1369
+ * @param callback - The callback function to remove
1370
+ */
1371
+ offAudioResponse(callback) {
1372
+ this.audioEvents.off("audioResponse", callback);
1373
+ }
1374
+ };
1375
+
1376
+ // src/execution/index.ts
1377
+ var execution_exports = {};
1378
+ __export(execution_exports, {
1379
+ ScenarioExecution: () => ScenarioExecution,
1380
+ ScenarioExecutionState: () => ScenarioExecutionState,
1381
+ StateChangeEventType: () => StateChangeEventType
1382
+ });
1383
+
1384
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
1385
+ var _globalThis = typeof globalThis === "object" ? globalThis : global;
1386
+
1387
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
1388
+ var VERSION = "1.9.0";
1389
+
1390
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
1391
+ var re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
1392
+ function _makeCompatibilityCheck(ownVersion) {
1393
+ var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
1394
+ var rejectedVersions = /* @__PURE__ */ new Set();
1395
+ var myVersionMatch = ownVersion.match(re);
1396
+ if (!myVersionMatch) {
1397
+ return function() {
1398
+ return false;
1399
+ };
1400
+ }
1401
+ var ownVersionParsed = {
1402
+ major: +myVersionMatch[1],
1403
+ minor: +myVersionMatch[2],
1404
+ patch: +myVersionMatch[3],
1405
+ prerelease: myVersionMatch[4]
1406
+ };
1407
+ if (ownVersionParsed.prerelease != null) {
1408
+ return function isExactmatch(globalVersion) {
1409
+ return globalVersion === ownVersion;
1410
+ };
1411
+ }
1412
+ function _reject(v) {
1413
+ rejectedVersions.add(v);
1414
+ return false;
1415
+ }
1416
+ function _accept(v) {
1417
+ acceptedVersions.add(v);
1418
+ return true;
1419
+ }
1420
+ return function isCompatible2(globalVersion) {
1421
+ if (acceptedVersions.has(globalVersion)) {
1422
+ return true;
1423
+ }
1424
+ if (rejectedVersions.has(globalVersion)) {
1425
+ return false;
1426
+ }
1427
+ var globalVersionMatch = globalVersion.match(re);
1428
+ if (!globalVersionMatch) {
1429
+ return _reject(globalVersion);
1430
+ }
1431
+ var globalVersionParsed = {
1432
+ major: +globalVersionMatch[1],
1433
+ minor: +globalVersionMatch[2],
1434
+ patch: +globalVersionMatch[3],
1435
+ prerelease: globalVersionMatch[4]
1436
+ };
1437
+ if (globalVersionParsed.prerelease != null) {
1438
+ return _reject(globalVersion);
1439
+ }
1440
+ if (ownVersionParsed.major !== globalVersionParsed.major) {
1441
+ return _reject(globalVersion);
1442
+ }
1443
+ if (ownVersionParsed.major === 0) {
1444
+ if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
1445
+ return _accept(globalVersion);
1446
+ }
1447
+ return _reject(globalVersion);
1448
+ }
1449
+ if (ownVersionParsed.minor <= globalVersionParsed.minor) {
1450
+ return _accept(globalVersion);
1451
+ }
1452
+ return _reject(globalVersion);
1453
+ };
1454
+ }
1455
+ var isCompatible = _makeCompatibilityCheck(VERSION);
1456
+
1457
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
1458
+ var major = VERSION.split(".")[0];
1459
+ var GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
1460
+ var _global = _globalThis;
1461
+ function registerGlobal(type, instance, diag, allowOverride) {
1462
+ var _a;
1463
+ if (allowOverride === void 0) {
1464
+ allowOverride = false;
1465
+ }
1466
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
1467
+ version: VERSION
1468
+ };
1469
+ if (!allowOverride && api[type]) {
1470
+ var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
1471
+ diag.error(err.stack || err.message);
1472
+ return false;
1473
+ }
1474
+ if (api.version !== VERSION) {
1475
+ var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
1476
+ diag.error(err.stack || err.message);
1477
+ return false;
1478
+ }
1479
+ api[type] = instance;
1480
+ diag.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
1481
+ return true;
1482
+ }
1483
+ function getGlobal(type) {
1484
+ var _a, _b;
1485
+ var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
1486
+ if (!globalVersion || !isCompatible(globalVersion)) {
1487
+ return;
1488
+ }
1489
+ return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
1490
+ }
1491
+ function unregisterGlobal(type, diag) {
1492
+ diag.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
1493
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
1494
+ if (api) {
1495
+ delete api[type];
1496
+ }
1497
+ }
1498
+
1499
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
1500
+ var __read = function(o, n) {
1501
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1502
+ if (!m) return o;
1503
+ var i = m.call(o), r, ar = [], e;
1504
+ try {
1505
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1506
+ } catch (error) {
1507
+ e = { error };
1508
+ } finally {
1509
+ try {
1510
+ if (r && !r.done && (m = i["return"])) m.call(i);
1511
+ } finally {
1512
+ if (e) throw e.error;
1513
+ }
1514
+ }
1515
+ return ar;
1516
+ };
1517
+ var __spreadArray = function(to, from, pack) {
1518
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1519
+ if (ar || !(i in from)) {
1520
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1521
+ ar[i] = from[i];
1522
+ }
1523
+ }
1524
+ return to.concat(ar || Array.prototype.slice.call(from));
1525
+ };
1526
+ var DiagComponentLogger = (
1527
+ /** @class */
1528
+ (function() {
1529
+ function DiagComponentLogger2(props) {
1530
+ this._namespace = props.namespace || "DiagComponentLogger";
1531
+ }
1532
+ DiagComponentLogger2.prototype.debug = function() {
1533
+ var args = [];
1534
+ for (var _i = 0; _i < arguments.length; _i++) {
1535
+ args[_i] = arguments[_i];
1536
+ }
1537
+ return logProxy("debug", this._namespace, args);
1538
+ };
1539
+ DiagComponentLogger2.prototype.error = function() {
1540
+ var args = [];
1541
+ for (var _i = 0; _i < arguments.length; _i++) {
1542
+ args[_i] = arguments[_i];
1543
+ }
1544
+ return logProxy("error", this._namespace, args);
1545
+ };
1546
+ DiagComponentLogger2.prototype.info = function() {
1547
+ var args = [];
1548
+ for (var _i = 0; _i < arguments.length; _i++) {
1549
+ args[_i] = arguments[_i];
1550
+ }
1551
+ return logProxy("info", this._namespace, args);
1552
+ };
1553
+ DiagComponentLogger2.prototype.warn = function() {
1554
+ var args = [];
1555
+ for (var _i = 0; _i < arguments.length; _i++) {
1556
+ args[_i] = arguments[_i];
1557
+ }
1558
+ return logProxy("warn", this._namespace, args);
1559
+ };
1560
+ DiagComponentLogger2.prototype.verbose = function() {
1561
+ var args = [];
1562
+ for (var _i = 0; _i < arguments.length; _i++) {
1563
+ args[_i] = arguments[_i];
1564
+ }
1565
+ return logProxy("verbose", this._namespace, args);
1566
+ };
1567
+ return DiagComponentLogger2;
1568
+ })()
1569
+ );
1570
+ function logProxy(funcName, namespace, args) {
1571
+ var logger2 = getGlobal("diag");
1572
+ if (!logger2) {
1573
+ return;
1574
+ }
1575
+ args.unshift(namespace);
1576
+ return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
1577
+ }
1578
+
1579
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
1580
+ var DiagLogLevel;
1581
+ (function(DiagLogLevel2) {
1582
+ DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
1583
+ DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
1584
+ DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
1585
+ DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
1586
+ DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
1587
+ DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
1588
+ DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
1589
+ })(DiagLogLevel || (DiagLogLevel = {}));
1590
+
1591
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
1592
+ function createLogLevelDiagLogger(maxLevel, logger2) {
1593
+ if (maxLevel < DiagLogLevel.NONE) {
1594
+ maxLevel = DiagLogLevel.NONE;
1595
+ } else if (maxLevel > DiagLogLevel.ALL) {
1596
+ maxLevel = DiagLogLevel.ALL;
1597
+ }
1598
+ logger2 = logger2 || {};
1599
+ function _filterFunc(funcName, theLevel) {
1600
+ var theFunc = logger2[funcName];
1601
+ if (typeof theFunc === "function" && maxLevel >= theLevel) {
1602
+ return theFunc.bind(logger2);
1603
+ }
1604
+ return function() {
1605
+ };
1606
+ }
1607
+ return {
1608
+ error: _filterFunc("error", DiagLogLevel.ERROR),
1609
+ warn: _filterFunc("warn", DiagLogLevel.WARN),
1610
+ info: _filterFunc("info", DiagLogLevel.INFO),
1611
+ debug: _filterFunc("debug", DiagLogLevel.DEBUG),
1612
+ verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
1613
+ };
1614
+ }
1615
+
1616
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
1617
+ var __read2 = function(o, n) {
1618
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1619
+ if (!m) return o;
1620
+ var i = m.call(o), r, ar = [], e;
1621
+ try {
1622
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1623
+ } catch (error) {
1624
+ e = { error };
1625
+ } finally {
1626
+ try {
1627
+ if (r && !r.done && (m = i["return"])) m.call(i);
1628
+ } finally {
1629
+ if (e) throw e.error;
1630
+ }
1631
+ }
1632
+ return ar;
1633
+ };
1634
+ var __spreadArray2 = function(to, from, pack) {
1635
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1636
+ if (ar || !(i in from)) {
1637
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1638
+ ar[i] = from[i];
1639
+ }
1640
+ }
1641
+ return to.concat(ar || Array.prototype.slice.call(from));
1642
+ };
1643
+ var API_NAME = "diag";
1644
+ var DiagAPI = (
1645
+ /** @class */
1646
+ (function() {
1647
+ function DiagAPI2() {
1648
+ function _logProxy(funcName) {
1649
+ return function() {
1650
+ var args = [];
1651
+ for (var _i = 0; _i < arguments.length; _i++) {
1652
+ args[_i] = arguments[_i];
1653
+ }
1654
+ var logger2 = getGlobal("diag");
1655
+ if (!logger2)
1656
+ return;
1657
+ return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
1658
+ };
1659
+ }
1660
+ var self = this;
1661
+ var setLogger = function(logger2, optionsOrLogLevel) {
1662
+ var _a, _b, _c;
1663
+ if (optionsOrLogLevel === void 0) {
1664
+ optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
1665
+ }
1666
+ if (logger2 === self) {
1667
+ var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
1668
+ self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
1669
+ return false;
1670
+ }
1671
+ if (typeof optionsOrLogLevel === "number") {
1672
+ optionsOrLogLevel = {
1673
+ logLevel: optionsOrLogLevel
1674
+ };
1675
+ }
1676
+ var oldLogger = getGlobal("diag");
1677
+ var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
1678
+ if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
1679
+ var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
1680
+ oldLogger.warn("Current logger will be overwritten from " + stack);
1681
+ newLogger.warn("Current logger will overwrite one already registered from " + stack);
1682
+ }
1683
+ return registerGlobal("diag", newLogger, self, true);
1684
+ };
1685
+ self.setLogger = setLogger;
1686
+ self.disable = function() {
1687
+ unregisterGlobal(API_NAME, self);
1688
+ };
1689
+ self.createComponentLogger = function(options) {
1690
+ return new DiagComponentLogger(options);
1691
+ };
1692
+ self.verbose = _logProxy("verbose");
1693
+ self.debug = _logProxy("debug");
1694
+ self.info = _logProxy("info");
1695
+ self.warn = _logProxy("warn");
1696
+ self.error = _logProxy("error");
1697
+ }
1698
+ DiagAPI2.instance = function() {
1699
+ if (!this._instance) {
1700
+ this._instance = new DiagAPI2();
1701
+ }
1702
+ return this._instance;
1703
+ };
1704
+ return DiagAPI2;
1705
+ })()
1706
+ );
1707
+
1708
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
1709
+ function createContextKey(description) {
1710
+ return Symbol.for(description);
1711
+ }
1712
+ var BaseContext = (
1713
+ /** @class */
1714
+ /* @__PURE__ */ (function() {
1715
+ function BaseContext2(parentContext) {
1716
+ var self = this;
1717
+ self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
1718
+ self.getValue = function(key) {
1719
+ return self._currentContext.get(key);
1720
+ };
1721
+ self.setValue = function(key, value) {
1722
+ var context2 = new BaseContext2(self._currentContext);
1723
+ context2._currentContext.set(key, value);
1724
+ return context2;
1725
+ };
1726
+ self.deleteValue = function(key) {
1727
+ var context2 = new BaseContext2(self._currentContext);
1728
+ context2._currentContext.delete(key);
1729
+ return context2;
1730
+ };
1731
+ }
1732
+ return BaseContext2;
1733
+ })()
1734
+ );
1735
+ var ROOT_CONTEXT = new BaseContext();
1736
+
1737
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
1738
+ var __read3 = function(o, n) {
1739
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1740
+ if (!m) return o;
1741
+ var i = m.call(o), r, ar = [], e;
1742
+ try {
1743
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1744
+ } catch (error) {
1745
+ e = { error };
1746
+ } finally {
1747
+ try {
1748
+ if (r && !r.done && (m = i["return"])) m.call(i);
1749
+ } finally {
1750
+ if (e) throw e.error;
1751
+ }
1752
+ }
1753
+ return ar;
1754
+ };
1755
+ var __spreadArray3 = function(to, from, pack) {
1756
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1757
+ if (ar || !(i in from)) {
1758
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1759
+ ar[i] = from[i];
1760
+ }
1761
+ }
1762
+ return to.concat(ar || Array.prototype.slice.call(from));
1763
+ };
1764
+ var NoopContextManager = (
1765
+ /** @class */
1766
+ (function() {
1767
+ function NoopContextManager2() {
1768
+ }
1769
+ NoopContextManager2.prototype.active = function() {
1770
+ return ROOT_CONTEXT;
1771
+ };
1772
+ NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
1773
+ var args = [];
1774
+ for (var _i = 3; _i < arguments.length; _i++) {
1775
+ args[_i - 3] = arguments[_i];
1776
+ }
1777
+ return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
1778
+ };
1779
+ NoopContextManager2.prototype.bind = function(_context, target) {
1780
+ return target;
1781
+ };
1782
+ NoopContextManager2.prototype.enable = function() {
1783
+ return this;
1784
+ };
1785
+ NoopContextManager2.prototype.disable = function() {
1786
+ return this;
1787
+ };
1788
+ return NoopContextManager2;
1789
+ })()
1790
+ );
1791
+
1792
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
1793
+ var __read4 = function(o, n) {
1794
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1795
+ if (!m) return o;
1796
+ var i = m.call(o), r, ar = [], e;
1797
+ try {
1798
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1799
+ } catch (error) {
1800
+ e = { error };
1801
+ } finally {
1802
+ try {
1803
+ if (r && !r.done && (m = i["return"])) m.call(i);
1804
+ } finally {
1805
+ if (e) throw e.error;
1806
+ }
1807
+ }
1808
+ return ar;
1809
+ };
1810
+ var __spreadArray4 = function(to, from, pack) {
1811
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1812
+ if (ar || !(i in from)) {
1813
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1814
+ ar[i] = from[i];
1815
+ }
1816
+ }
1817
+ return to.concat(ar || Array.prototype.slice.call(from));
1818
+ };
1819
+ var API_NAME2 = "context";
1820
+ var NOOP_CONTEXT_MANAGER = new NoopContextManager();
1821
+ var ContextAPI = (
1822
+ /** @class */
1823
+ (function() {
1824
+ function ContextAPI2() {
1825
+ }
1826
+ ContextAPI2.getInstance = function() {
1827
+ if (!this._instance) {
1828
+ this._instance = new ContextAPI2();
1829
+ }
1830
+ return this._instance;
1831
+ };
1832
+ ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
1833
+ return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
1834
+ };
1835
+ ContextAPI2.prototype.active = function() {
1836
+ return this._getContextManager().active();
1837
+ };
1838
+ ContextAPI2.prototype.with = function(context2, fn, thisArg) {
1839
+ var _a;
1840
+ var args = [];
1841
+ for (var _i = 3; _i < arguments.length; _i++) {
1842
+ args[_i - 3] = arguments[_i];
1843
+ }
1844
+ return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
1845
+ };
1846
+ ContextAPI2.prototype.bind = function(context2, target) {
1847
+ return this._getContextManager().bind(context2, target);
1848
+ };
1849
+ ContextAPI2.prototype._getContextManager = function() {
1850
+ return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
1851
+ };
1852
+ ContextAPI2.prototype.disable = function() {
1853
+ this._getContextManager().disable();
1854
+ unregisterGlobal(API_NAME2, DiagAPI.instance());
1855
+ };
1856
+ return ContextAPI2;
1857
+ })()
1858
+ );
1859
+
1860
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
1861
+ var TraceFlags;
1862
+ (function(TraceFlags2) {
1863
+ TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
1864
+ TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
1865
+ })(TraceFlags || (TraceFlags = {}));
1866
+
1867
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
1868
+ var INVALID_SPANID = "0000000000000000";
1869
+ var INVALID_TRACEID = "00000000000000000000000000000000";
1870
+ var INVALID_SPAN_CONTEXT = {
1871
+ traceId: INVALID_TRACEID,
1872
+ spanId: INVALID_SPANID,
1873
+ traceFlags: TraceFlags.NONE
1874
+ };
1875
+
1876
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
1877
+ var NonRecordingSpan = (
1878
+ /** @class */
1879
+ (function() {
1880
+ function NonRecordingSpan2(_spanContext) {
1881
+ if (_spanContext === void 0) {
1882
+ _spanContext = INVALID_SPAN_CONTEXT;
1883
+ }
1884
+ this._spanContext = _spanContext;
1885
+ }
1886
+ NonRecordingSpan2.prototype.spanContext = function() {
1887
+ return this._spanContext;
1888
+ };
1889
+ NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
1890
+ return this;
1891
+ };
1892
+ NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
1893
+ return this;
1894
+ };
1895
+ NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
1896
+ return this;
1897
+ };
1898
+ NonRecordingSpan2.prototype.addLink = function(_link) {
1899
+ return this;
1900
+ };
1901
+ NonRecordingSpan2.prototype.addLinks = function(_links) {
1902
+ return this;
1903
+ };
1904
+ NonRecordingSpan2.prototype.setStatus = function(_status) {
1905
+ return this;
1906
+ };
1907
+ NonRecordingSpan2.prototype.updateName = function(_name) {
1908
+ return this;
1909
+ };
1910
+ NonRecordingSpan2.prototype.end = function(_endTime) {
1911
+ };
1912
+ NonRecordingSpan2.prototype.isRecording = function() {
1913
+ return false;
1914
+ };
1915
+ NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
1916
+ };
1917
+ return NonRecordingSpan2;
1918
+ })()
1919
+ );
1920
+
1921
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
1922
+ var SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
1923
+ function getSpan(context2) {
1924
+ return context2.getValue(SPAN_KEY) || void 0;
1925
+ }
1926
+ function getActiveSpan() {
1927
+ return getSpan(ContextAPI.getInstance().active());
1928
+ }
1929
+ function setSpan(context2, span) {
1930
+ return context2.setValue(SPAN_KEY, span);
1931
+ }
1932
+ function deleteSpan(context2) {
1933
+ return context2.deleteValue(SPAN_KEY);
1934
+ }
1935
+ function setSpanContext(context2, spanContext) {
1936
+ return setSpan(context2, new NonRecordingSpan(spanContext));
1937
+ }
1938
+ function getSpanContext(context2) {
1939
+ var _a;
1940
+ return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
1941
+ }
1942
+
1943
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
1944
+ var VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
1945
+ var VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
1946
+ function isValidTraceId(traceId) {
1947
+ return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
1948
+ }
1949
+ function isValidSpanId(spanId) {
1950
+ return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
1951
+ }
1952
+ function isSpanContextValid(spanContext) {
1953
+ return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
1954
+ }
1955
+ function wrapSpanContext(spanContext) {
1956
+ return new NonRecordingSpan(spanContext);
1957
+ }
1958
+
1959
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
1960
+ var contextApi = ContextAPI.getInstance();
1961
+ var NoopTracer = (
1962
+ /** @class */
1963
+ (function() {
1964
+ function NoopTracer2() {
1965
+ }
1966
+ NoopTracer2.prototype.startSpan = function(name, options, context2) {
1967
+ if (context2 === void 0) {
1968
+ context2 = contextApi.active();
1969
+ }
1970
+ var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
1971
+ if (root) {
1972
+ return new NonRecordingSpan();
1973
+ }
1974
+ var parentFromContext = context2 && getSpanContext(context2);
1975
+ if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
1976
+ return new NonRecordingSpan(parentFromContext);
1977
+ } else {
1978
+ return new NonRecordingSpan();
1979
+ }
1980
+ };
1981
+ NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
1982
+ var opts;
1983
+ var ctx;
1984
+ var fn;
1985
+ if (arguments.length < 2) {
1986
+ return;
1987
+ } else if (arguments.length === 2) {
1988
+ fn = arg2;
1989
+ } else if (arguments.length === 3) {
1990
+ opts = arg2;
1991
+ fn = arg3;
1992
+ } else {
1993
+ opts = arg2;
1994
+ ctx = arg3;
1995
+ fn = arg4;
1996
+ }
1997
+ var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
1998
+ var span = this.startSpan(name, opts, parentContext);
1999
+ var contextWithSpanSet = setSpan(parentContext, span);
2000
+ return contextApi.with(contextWithSpanSet, fn, void 0, span);
2001
+ };
2002
+ return NoopTracer2;
2003
+ })()
2004
+ );
2005
+ function isSpanContext(spanContext) {
2006
+ return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
2007
+ }
2008
+
2009
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
2010
+ var NOOP_TRACER = new NoopTracer();
2011
+ var ProxyTracer = (
2012
+ /** @class */
2013
+ (function() {
2014
+ function ProxyTracer2(_provider, name, version, options) {
2015
+ this._provider = _provider;
2016
+ this.name = name;
2017
+ this.version = version;
2018
+ this.options = options;
2019
+ }
2020
+ ProxyTracer2.prototype.startSpan = function(name, options, context2) {
2021
+ return this._getTracer().startSpan(name, options, context2);
2022
+ };
2023
+ ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
2024
+ var tracer = this._getTracer();
2025
+ return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
2026
+ };
2027
+ ProxyTracer2.prototype._getTracer = function() {
2028
+ if (this._delegate) {
2029
+ return this._delegate;
2030
+ }
2031
+ var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
2032
+ if (!tracer) {
2033
+ return NOOP_TRACER;
2034
+ }
2035
+ this._delegate = tracer;
2036
+ return this._delegate;
2037
+ };
2038
+ return ProxyTracer2;
2039
+ })()
2040
+ );
2041
+
2042
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
2043
+ var NoopTracerProvider = (
2044
+ /** @class */
2045
+ (function() {
2046
+ function NoopTracerProvider2() {
311
2047
  }
312
- const reversedMessages = messageRoleReversal(messages);
313
- const completion = await this.generateText({
314
- model: mergedConfig.model,
315
- messages: reversedMessages,
316
- temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
317
- maxTokens: mergedConfig.maxTokens
318
- });
319
- const messageContent = completion.text;
320
- if (!messageContent) {
321
- throw new Error("No response content from LLM");
2048
+ NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
2049
+ return new NoopTracer();
2050
+ };
2051
+ return NoopTracerProvider2;
2052
+ })()
2053
+ );
2054
+
2055
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
2056
+ var NOOP_TRACER_PROVIDER = new NoopTracerProvider();
2057
+ var ProxyTracerProvider = (
2058
+ /** @class */
2059
+ (function() {
2060
+ function ProxyTracerProvider2() {
322
2061
  }
323
- return { role: "user", content: messageContent };
324
- };
325
- async generateText(input) {
326
- try {
327
- return await generateText2(input);
328
- } catch (error) {
329
- this.logger.error("Error generating text", { error });
330
- throw error;
2062
+ ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
2063
+ var _a;
2064
+ return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
2065
+ };
2066
+ ProxyTracerProvider2.prototype.getDelegate = function() {
2067
+ var _a;
2068
+ return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
2069
+ };
2070
+ ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
2071
+ this._delegate = delegate;
2072
+ };
2073
+ ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
2074
+ var _a;
2075
+ return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
2076
+ };
2077
+ return ProxyTracerProvider2;
2078
+ })()
2079
+ );
2080
+
2081
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
2082
+ var context = ContextAPI.getInstance();
2083
+
2084
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
2085
+ var API_NAME3 = "trace";
2086
+ var TraceAPI = (
2087
+ /** @class */
2088
+ (function() {
2089
+ function TraceAPI2() {
2090
+ this._proxyTracerProvider = new ProxyTracerProvider();
2091
+ this.wrapSpanContext = wrapSpanContext;
2092
+ this.isSpanContextValid = isSpanContextValid;
2093
+ this.deleteSpan = deleteSpan;
2094
+ this.getSpan = getSpan;
2095
+ this.getActiveSpan = getActiveSpan;
2096
+ this.getSpanContext = getSpanContext;
2097
+ this.setSpan = setSpan;
2098
+ this.setSpanContext = setSpanContext;
331
2099
  }
332
- }
333
- };
334
- var userSimulatorAgent = (config) => {
335
- return new UserSimulatorAgent(config);
336
- };
2100
+ TraceAPI2.getInstance = function() {
2101
+ if (!this._instance) {
2102
+ this._instance = new TraceAPI2();
2103
+ }
2104
+ return this._instance;
2105
+ };
2106
+ TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
2107
+ var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
2108
+ if (success) {
2109
+ this._proxyTracerProvider.setDelegate(provider);
2110
+ }
2111
+ return success;
2112
+ };
2113
+ TraceAPI2.prototype.getTracerProvider = function() {
2114
+ return getGlobal(API_NAME3) || this._proxyTracerProvider;
2115
+ };
2116
+ TraceAPI2.prototype.getTracer = function(name, version) {
2117
+ return this.getTracerProvider().getTracer(name, version);
2118
+ };
2119
+ TraceAPI2.prototype.disable = function() {
2120
+ unregisterGlobal(API_NAME3, DiagAPI.instance());
2121
+ this._proxyTracerProvider = new ProxyTracerProvider();
2122
+ };
2123
+ return TraceAPI2;
2124
+ })()
2125
+ );
337
2126
 
338
- // src/execution/index.ts
339
- var execution_exports = {};
340
- __export(execution_exports, {
341
- ScenarioExecution: () => ScenarioExecution,
342
- ScenarioExecutionState: () => ScenarioExecutionState,
343
- StateChangeEventType: () => StateChangeEventType
344
- });
2127
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
2128
+ var trace = TraceAPI.getInstance();
345
2129
 
346
2130
  // src/execution/scenario-execution.ts
2131
+ import { getLangWatchTracer } from "langwatch";
2132
+ import { attributes as attributes3 } from "langwatch/observability";
347
2133
  import { filter, Subject as Subject2 } from "rxjs";
348
2134
 
349
2135
  // src/execution/scenario-execution-state.ts
350
2136
  import { Subject } from "rxjs";
2137
+
2138
+ // src/utils/ids.ts
2139
+ import crypto from "crypto";
2140
+ import process2 from "process";
2141
+ import { generate, parse } from "xksuid";
2142
+ var batchRunId;
2143
+ function generateThreadId() {
2144
+ return `scenariothread_${generate()}`;
2145
+ }
2146
+ function generateScenarioRunId() {
2147
+ return `scenariorun_${generate()}`;
2148
+ }
2149
+ function generateScenarioId() {
2150
+ return `scenario_${generate()}`;
2151
+ }
2152
+ function getBatchRunId() {
2153
+ if (batchRunId) {
2154
+ return batchRunId;
2155
+ }
2156
+ if (process2.env.SCENARIO_BATCH_RUN_ID) {
2157
+ return batchRunId = process2.env.SCENARIO_BATCH_RUN_ID;
2158
+ }
2159
+ if (process2.env.VITEST_WORKER_ID || process2.env.JEST_WORKER_ID) {
2160
+ const parentProcessId = process2.ppid;
2161
+ const now = /* @__PURE__ */ new Date();
2162
+ const year = now.getUTCFullYear();
2163
+ const week = String(getISOWeekNumber(now)).padStart(2, "0");
2164
+ const raw = `${parentProcessId}_${year}_w${week}`;
2165
+ const hash = crypto.createHash("sha256").update(raw).digest("hex").slice(0, 12);
2166
+ return batchRunId = `scenariobatch_${hash}`;
2167
+ }
2168
+ return batchRunId = `scenariobatch_${generate()}`;
2169
+ }
2170
+ function getISOWeekNumber(date) {
2171
+ const tmp = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
2172
+ const dayNum = tmp.getUTCDay() || 7;
2173
+ tmp.setUTCDate(tmp.getUTCDate() + 4 - dayNum);
2174
+ const yearStart = new Date(Date.UTC(tmp.getUTCFullYear(), 0, 1));
2175
+ const weekNo = Math.ceil(((tmp.getTime() - yearStart.getTime()) / 864e5 + 1) / 7);
2176
+ return weekNo;
2177
+ }
2178
+ function generateMessageId() {
2179
+ return `scenariomsg_${generate()}`;
2180
+ }
2181
+
2182
+ // src/execution/scenario-execution-state.ts
351
2183
  var StateChangeEventType = /* @__PURE__ */ ((StateChangeEventType2) => {
352
2184
  StateChangeEventType2["MESSAGE_ADDED"] = "MESSAGE_ADDED";
353
2185
  return StateChangeEventType2;
@@ -361,9 +2193,9 @@ var ScenarioExecutionState = class {
361
2193
  events$ = this.eventSubject.asObservable();
362
2194
  description;
363
2195
  config;
364
- constructor(config) {
365
- this.config = config;
366
- this.description = config.description;
2196
+ constructor(config2) {
2197
+ this.config = config2;
2198
+ this.description = config2.description;
367
2199
  }
368
2200
  get messages() {
369
2201
  return this._messages;
@@ -384,9 +2216,13 @@ var ScenarioExecutionState = class {
384
2216
  * Adds a message to the conversation history.
385
2217
  *
386
2218
  * @param message - The message to add.
2219
+ * @param traceId - Optional trace ID to associate with the message.
387
2220
  */
388
2221
  addMessage(message2) {
389
- const messageWithId = { ...message2, id: generateMessageId() };
2222
+ const messageWithId = {
2223
+ ...message2,
2224
+ id: generateMessageId()
2225
+ };
390
2226
  this._messages.push(messageWithId);
391
2227
  this.eventSubject.next({ type: "MESSAGE_ADDED" /* MESSAGE_ADDED */ });
392
2228
  }
@@ -440,14 +2276,87 @@ var ScenarioExecutionState = class {
440
2276
  }
441
2277
  };
442
2278
 
2279
+ // src/events/schema.ts
2280
+ import { EventType, MessagesSnapshotEventSchema } from "@ag-ui/core";
2281
+ import { z as z5 } from "zod";
2282
+ var Verdict = /* @__PURE__ */ ((Verdict2) => {
2283
+ Verdict2["SUCCESS"] = "success";
2284
+ Verdict2["FAILURE"] = "failure";
2285
+ Verdict2["INCONCLUSIVE"] = "inconclusive";
2286
+ return Verdict2;
2287
+ })(Verdict || {});
2288
+ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
2289
+ ScenarioRunStatus2["SUCCESS"] = "SUCCESS";
2290
+ ScenarioRunStatus2["ERROR"] = "ERROR";
2291
+ ScenarioRunStatus2["CANCELLED"] = "CANCELLED";
2292
+ ScenarioRunStatus2["IN_PROGRESS"] = "IN_PROGRESS";
2293
+ ScenarioRunStatus2["PENDING"] = "PENDING";
2294
+ ScenarioRunStatus2["FAILED"] = "FAILED";
2295
+ return ScenarioRunStatus2;
2296
+ })(ScenarioRunStatus || {});
2297
+ var baseEventSchema = z5.object({
2298
+ type: z5.nativeEnum(EventType),
2299
+ timestamp: z5.number(),
2300
+ rawEvent: z5.any().optional()
2301
+ });
2302
+ var batchRunIdSchema = z5.string();
2303
+ var scenarioRunIdSchema = z5.string();
2304
+ var scenarioIdSchema = z5.string();
2305
+ var baseScenarioEventSchema = baseEventSchema.extend({
2306
+ batchRunId: batchRunIdSchema,
2307
+ scenarioId: scenarioIdSchema,
2308
+ scenarioRunId: scenarioRunIdSchema,
2309
+ scenarioSetId: z5.string().optional().default("default")
2310
+ });
2311
+ var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
2312
+ type: z5.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
2313
+ metadata: z5.object({
2314
+ name: z5.string().optional(),
2315
+ description: z5.string().optional()
2316
+ })
2317
+ });
2318
+ var scenarioResultsSchema = z5.object({
2319
+ verdict: z5.nativeEnum(Verdict),
2320
+ reasoning: z5.string().optional(),
2321
+ metCriteria: z5.array(z5.string()),
2322
+ unmetCriteria: z5.array(z5.string()),
2323
+ error: z5.string().optional()
2324
+ });
2325
+ var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
2326
+ type: z5.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
2327
+ status: z5.nativeEnum(ScenarioRunStatus),
2328
+ results: scenarioResultsSchema.optional().nullable()
2329
+ });
2330
+ var scenarioMessageSnapshotSchema = MessagesSnapshotEventSchema.merge(
2331
+ baseScenarioEventSchema.extend({
2332
+ type: z5.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
2333
+ })
2334
+ );
2335
+ var scenarioEventSchema = z5.discriminatedUnion("type", [
2336
+ scenarioRunStartedSchema,
2337
+ scenarioRunFinishedSchema,
2338
+ scenarioMessageSnapshotSchema
2339
+ ]);
2340
+ var successSchema = z5.object({ success: z5.boolean() });
2341
+ var errorSchema = z5.object({ error: z5.string() });
2342
+ var stateSchema = z5.object({
2343
+ state: z5.object({
2344
+ messages: z5.array(z5.any()),
2345
+ status: z5.string()
2346
+ })
2347
+ });
2348
+ var runsSchema = z5.object({ runs: z5.array(z5.string()) });
2349
+ var eventsSchema = z5.object({ events: z5.array(scenarioEventSchema) });
2350
+
443
2351
  // src/utils/convert-core-messages-to-agui-messages.ts
444
- function convertCoreMessagesToAguiMessages(coreMessages) {
2352
+ function convertModelMessagesToAguiMessages(modelMessages) {
445
2353
  const aguiMessages = [];
446
- for (const msg of coreMessages) {
2354
+ for (const msg of modelMessages) {
447
2355
  const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
448
2356
  switch (true) {
449
2357
  case msg.role === "system":
450
2358
  aguiMessages.push({
2359
+ trace_id: msg.traceId,
451
2360
  id,
452
2361
  role: "system",
453
2362
  content: msg.content
@@ -455,6 +2364,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
455
2364
  break;
456
2365
  case (msg.role === "user" && typeof msg.content === "string"):
457
2366
  aguiMessages.push({
2367
+ trace_id: msg.traceId,
458
2368
  id,
459
2369
  role: "user",
460
2370
  content: msg.content
@@ -463,6 +2373,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
463
2373
  // Handle any other user message content format
464
2374
  case (msg.role === "user" && Array.isArray(msg.content)):
465
2375
  aguiMessages.push({
2376
+ trace_id: msg.traceId,
466
2377
  id,
467
2378
  role: "user",
468
2379
  content: JSON.stringify(msg.content)
@@ -470,6 +2381,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
470
2381
  break;
471
2382
  case (msg.role === "assistant" && typeof msg.content === "string"):
472
2383
  aguiMessages.push({
2384
+ trace_id: msg.traceId,
473
2385
  id,
474
2386
  role: "assistant",
475
2387
  content: msg.content
@@ -479,6 +2391,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
479
2391
  const toolCalls = msg.content.filter((p) => p.type === "tool-call");
480
2392
  const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
481
2393
  aguiMessages.push({
2394
+ trace_id: msg.traceId,
482
2395
  id,
483
2396
  role: "assistant",
484
2397
  content: JSON.stringify(nonToolCalls),
@@ -487,7 +2400,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
487
2400
  type: "function",
488
2401
  function: {
489
2402
  name: c.toolName,
490
- arguments: JSON.stringify(c.args)
2403
+ arguments: JSON.stringify(c.input)
491
2404
  }
492
2405
  }))
493
2406
  });
@@ -495,11 +2408,13 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
495
2408
  }
496
2409
  case msg.role === "tool":
497
2410
  msg.content.map((p, i) => {
2411
+ var _a;
498
2412
  aguiMessages.push({
2413
+ trace_id: msg.traceId,
499
2414
  id: `${id}-${i}`,
500
2415
  role: "tool",
501
2416
  toolCallId: p.toolCallId,
502
- content: JSON.stringify(p.result)
2417
+ content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
503
2418
  });
504
2419
  });
505
2420
  break;
@@ -509,12 +2424,16 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
509
2424
  }
510
2425
  return aguiMessages;
511
2426
  }
512
- var convert_core_messages_to_agui_messages_default = convertCoreMessagesToAguiMessages;
2427
+ var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
513
2428
 
514
2429
  // src/execution/scenario-execution.ts
515
2430
  var ScenarioExecution = class {
2431
+ /** LangWatch tracer for scenario execution */
2432
+ tracer = getLangWatchTracer("@langwatch/scenario");
516
2433
  /** The current state of the scenario execution */
517
2434
  state;
2435
+ /** The final result of the scenario execution, set when a conclusion is reached */
2436
+ _result;
518
2437
  /** Logger for debugging and monitoring */
519
2438
  logger = new Logger("scenario.execution.ScenarioExecution");
520
2439
  /** Finalized configuration with all defaults applied */
@@ -533,10 +2452,10 @@ var ScenarioExecution = class {
533
2452
  * Key: agent index, Value: array of pending messages for that agent
534
2453
  */
535
2454
  pendingMessages = /* @__PURE__ */ new Map();
536
- /** Intermediate result set by agents that make final decisions */
537
- partialResult = null;
538
2455
  /** Accumulated execution time for each agent (for performance tracking) */
539
2456
  agentTimes = /* @__PURE__ */ new Map();
2457
+ /** Current turn span for trace context management */
2458
+ currentTurnSpan;
540
2459
  /** Timestamp when execution started (for total time calculation) */
541
2460
  totalStartTime = 0;
542
2461
  /** Event stream for monitoring scenario progress */
@@ -557,17 +2476,17 @@ var ScenarioExecution = class {
557
2476
  * @param config - The scenario configuration containing agents, settings, and metadata
558
2477
  * @param script - The ordered sequence of script steps that define the test flow
559
2478
  */
560
- constructor(config, script) {
2479
+ constructor(config2, script) {
561
2480
  this.config = {
562
- id: config.id ?? generateScenarioId(),
563
- name: config.name,
564
- description: config.description,
565
- agents: config.agents,
2481
+ id: config2.id ?? generateScenarioId(),
2482
+ name: config2.name,
2483
+ description: config2.description,
2484
+ agents: config2.agents,
566
2485
  script,
567
- verbose: config.verbose ?? DEFAULT_VERBOSE,
568
- maxTurns: config.maxTurns ?? DEFAULT_MAX_TURNS,
569
- threadId: config.threadId ?? generateThreadId(),
570
- setId: config.setId
2486
+ verbose: config2.verbose ?? DEFAULT_VERBOSE,
2487
+ maxTurns: config2.maxTurns ?? DEFAULT_MAX_TURNS,
2488
+ threadId: config2.threadId ?? generateThreadId(),
2489
+ setId: config2.setId
571
2490
  };
572
2491
  this.state = new ScenarioExecutionState(this.config);
573
2492
  this.reset();
@@ -575,7 +2494,7 @@ var ScenarioExecution = class {
575
2494
  /**
576
2495
  * Gets the complete conversation history as an array of messages.
577
2496
  *
578
- * @returns Array of CoreMessage objects representing the full conversation
2497
+ * @returns Array of ModelMessage objects representing the full conversation
579
2498
  */
580
2499
  get messages() {
581
2500
  return this.state.messages;
@@ -589,6 +2508,41 @@ var ScenarioExecution = class {
589
2508
  get threadId() {
590
2509
  return this.state.threadId;
591
2510
  }
2511
+ /**
2512
+ * Gets the result of the scenario execution if it has been set.
2513
+ *
2514
+ * @returns The scenario result or undefined if not yet set
2515
+ */
2516
+ get result() {
2517
+ return this._result;
2518
+ }
2519
+ /**
2520
+ * Sets the result of the scenario execution.
2521
+ * This is called when the scenario reaches a conclusion (success or failure).
2522
+ * Automatically includes messages, totalTime, and agentTime from the current execution context.
2523
+ *
2524
+ * @param result - The final scenario result (without messages/timing, which will be added automatically)
2525
+ */
2526
+ setResult(result) {
2527
+ const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
2528
+ const agentTimes = agentRoleAgentsIdx.map(
2529
+ (i) => this.agentTimes.get(i) || 0
2530
+ );
2531
+ const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
2532
+ this._result = {
2533
+ ...result,
2534
+ messages: this.state.messages,
2535
+ totalTime: this.totalTime,
2536
+ agentTime: totalAgentTime
2537
+ };
2538
+ this.logger.debug(`[${this.config.id}] Result set`, {
2539
+ success: result.success,
2540
+ reasoning: result.reasoning,
2541
+ totalTime: this.totalTime,
2542
+ agentTime: totalAgentTime,
2543
+ messageCount: this.state.messages.length
2544
+ });
2545
+ }
592
2546
  /**
593
2547
  * The total elapsed time for the scenario execution.
594
2548
  */
@@ -622,8 +2576,14 @@ var ScenarioExecution = class {
622
2576
  * ```
623
2577
  */
624
2578
  async execute() {
2579
+ this.logger.debug(`[${this.config.id}] Starting scenario execution`, {
2580
+ name: this.config.name,
2581
+ maxTurns: this.config.maxTurns,
2582
+ scriptLength: this.config.script.length
2583
+ });
625
2584
  this.reset();
626
2585
  const scenarioRunId = generateScenarioRunId();
2586
+ this.logger.debug(`[${this.config.id}] Generated run ID: ${scenarioRunId}`);
627
2587
  this.emitRunStarted({ scenarioRunId });
628
2588
  const subscription = this.state.events$.pipe(
629
2589
  filter((event) => event.type === "MESSAGE_ADDED" /* MESSAGE_ADDED */)
@@ -633,18 +2593,17 @@ var ScenarioExecution = class {
633
2593
  try {
634
2594
  for (let i = 0; i < this.config.script.length; i++) {
635
2595
  const scriptStep = this.config.script[i];
636
- const result = await this.executeScriptStep(scriptStep, i);
637
- if (result && typeof result === "object" && "success" in result) {
2596
+ await this.executeScriptStep(scriptStep, i);
2597
+ if (this.result) {
638
2598
  this.emitRunFinished({
639
2599
  scenarioRunId,
640
- status: result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
641
- result
2600
+ status: this.result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
2601
+ result: this.result
642
2602
  });
643
- return result;
2603
+ return this.result;
644
2604
  }
645
2605
  }
646
- this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
647
- return this.reachedMaxTurns(
2606
+ this.reachedMaxTurns(
648
2607
  [
649
2608
  "Reached end of script without conclusion, add one of the following to the end of the script:",
650
2609
  "- `Scenario.proceed()` to let the simulation continue to play out",
@@ -652,20 +2611,21 @@ var ScenarioExecution = class {
652
2611
  "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
653
2612
  ].join("\n")
654
2613
  );
2614
+ this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
2615
+ return this.result;
655
2616
  } catch (error) {
656
2617
  const errorInfo = extractErrorInfo(error);
657
- const errorResult = {
2618
+ this.setResult({
658
2619
  success: false,
659
- messages: this.state.messages,
660
2620
  reasoning: `Scenario failed with error: ${errorInfo.message}`,
661
2621
  metCriteria: [],
662
2622
  unmetCriteria: [],
663
2623
  error: JSON.stringify(errorInfo)
664
- };
2624
+ });
665
2625
  this.emitRunFinished({
666
2626
  scenarioRunId,
667
2627
  status: "ERROR" /* ERROR */,
668
- result: errorResult
2628
+ result: this.result
669
2629
  });
670
2630
  throw error;
671
2631
  } finally {
@@ -683,50 +2643,66 @@ var ScenarioExecution = class {
683
2643
  * - Progress to the next turn if needed
684
2644
  * - Find the next agent that should act
685
2645
  * - Execute that agent's response
686
- * - Return either new messages or a final scenario result
2646
+ * - Set the result if the scenario concludes
687
2647
  *
688
2648
  * Note: This method is primarily for debugging or custom execution flows. Most users
689
2649
  * will use `execute()` to run the entire scenario automatically.
690
2650
  *
691
- * @returns A promise that resolves with either:
692
- * - Array of new messages added during the agent interaction, or
693
- * - A final ScenarioResult if the interaction concludes the scenario
694
- * @throws Error if no result is returned from the step
2651
+ * After calling this method, check `this.result` to see if the scenario has concluded.
695
2652
  *
696
2653
  * @example
697
2654
  * ```typescript
698
2655
  * const execution = new ScenarioExecution(config, script);
699
2656
  *
700
2657
  * // Execute one agent interaction at a time
701
- * const messages = await execution.step();
702
- * if (Array.isArray(messages)) {
703
- * console.log('New messages:', messages);
704
- * } else {
705
- * console.log('Scenario finished:', messages.success);
2658
+ * await execution.step();
2659
+ * if (execution.result) {
2660
+ * console.log('Scenario finished:', execution.result.success);
706
2661
  * }
707
2662
  * ```
708
2663
  */
709
2664
  async step() {
710
- const result = await this._step();
711
- if (result === null) throw new Error("No result from step");
712
- return result;
2665
+ await this._step();
713
2666
  }
714
2667
  async _step(goToNextTurn = true, onTurn) {
2668
+ this.logger.debug(`[${this.config.id}] _step called`, {
2669
+ goToNextTurn,
2670
+ pendingRoles: this.pendingRolesOnTurn,
2671
+ currentTurn: this.state.currentTurn
2672
+ });
715
2673
  if (this.pendingRolesOnTurn.length === 0) {
716
- if (!goToNextTurn) return null;
2674
+ if (!goToNextTurn) {
2675
+ this.logger.debug(
2676
+ `[${this.config.id}] No pending roles, not advancing turn`
2677
+ );
2678
+ return;
2679
+ }
717
2680
  this.newTurn();
718
2681
  if (onTurn) await onTurn(this.state);
719
- if (this.state.currentTurn >= this.config.maxTurns)
720
- return this.reachedMaxTurns();
2682
+ if (this.state.currentTurn >= this.config.maxTurns) {
2683
+ this.logger.debug(
2684
+ `[${this.config.id}] Reached max turns: ${this.state.currentTurn}`
2685
+ );
2686
+ this.reachedMaxTurns();
2687
+ return;
2688
+ }
721
2689
  }
722
2690
  const currentRole = this.pendingRolesOnTurn[0];
723
2691
  const { idx, agent: nextAgent } = this.nextAgentForRole(currentRole);
724
2692
  if (!nextAgent) {
2693
+ this.logger.debug(
2694
+ `[${this.config.id}] No agent for role ${currentRole}, removing role`
2695
+ );
725
2696
  this.removePendingRole(currentRole);
726
2697
  return this._step(goToNextTurn, onTurn);
727
2698
  }
2699
+ this.logger.debug(`[${this.config.id}] Calling agent`, {
2700
+ role: currentRole,
2701
+ agentIdx: idx,
2702
+ agentName: nextAgent.name ?? nextAgent.constructor.name
2703
+ });
728
2704
  this.removePendingAgent(nextAgent);
729
- return await this.callAgent(idx, currentRole);
2705
+ await this.callAgent(idx, currentRole);
730
2706
  }
731
2707
  /**
732
2708
  * Calls a specific agent to generate a response or make a decision.
@@ -745,19 +2721,25 @@ var ScenarioExecution = class {
745
2721
  * After the agent responds:
746
2722
  * - Performance timing is recorded
747
2723
  * - Pending messages for this agent are cleared (they've been processed)
748
- * - If the agent returns a ScenarioResult, it's returned immediately
2724
+ * - If the agent returns a ScenarioResult, it's set on this.result
749
2725
  * - Otherwise, the agent's messages are added to the conversation and broadcast
750
2726
  *
751
2727
  * @param idx - The index of the agent in the agents array
752
2728
  * @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
753
2729
  * @param judgmentRequest - Whether this is a judgment request (for judge agents)
754
- * @returns A promise that resolves with either:
755
- * - Array of messages if the agent generated a response, or
756
- * - ScenarioResult if the agent made a final decision
757
2730
  * @throws Error if the agent call fails
758
2731
  */
759
2732
  async callAgent(idx, role, judgmentRequest = false) {
2733
+ var _a;
760
2734
  const agent2 = this.agents[idx];
2735
+ const agentName = agent2.name ?? agent2.constructor.name;
2736
+ this.logger.debug(`[${this.config.id}] callAgent started`, {
2737
+ agentIdx: idx,
2738
+ role,
2739
+ judgmentRequest,
2740
+ agentName,
2741
+ pendingMessagesCount: ((_a = this.pendingMessages.get(idx)) == null ? void 0 : _a.length) ?? 0
2742
+ });
761
2743
  const startTime = Date.now();
762
2744
  const agentInput = {
763
2745
  threadId: this.state.threadId,
@@ -768,35 +2750,75 @@ var ScenarioExecution = class {
768
2750
  scenarioState: this.state,
769
2751
  scenarioConfig: this.config
770
2752
  };
2753
+ const agentContext = this.currentTurnSpan ? trace.setSpan(context.active(), this.currentTurnSpan) : context.active();
2754
+ const agentSpanName = `${agentName !== Object.prototype.constructor.name ? agent2.constructor.name : "Agent"}.call`;
771
2755
  try {
772
- const agentResponse = await agent2.call(agentInput);
773
- const endTime = Date.now();
774
- this.addAgentTime(idx, endTime - startTime);
775
- this.pendingMessages.delete(idx);
776
- if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
777
- return agentResponse;
778
- }
779
- const currentAgentTime = this.agentTimes.get(idx) ?? 0;
780
- this.agentTimes.set(idx, currentAgentTime + (Date.now() - startTime));
781
- const messages = convertAgentReturnTypesToMessages(
782
- agentResponse,
783
- role === "User" /* USER */ ? "user" : "assistant"
784
- );
785
- for (const message2 of messages) {
786
- this.state.addMessage(message2);
787
- this.broadcastMessage(message2, idx);
788
- }
789
- return messages;
790
- } catch (error) {
791
- this.logger.error(
792
- `[${this.config.id}] Error calling agent ${agent2.constructor.name}`,
2756
+ await this.tracer.withActiveSpan(
2757
+ agentSpanName,
793
2758
  {
794
- error: error instanceof Error ? error.message : String(error),
795
- agent: agent2.constructor.name,
796
- agentInput
2759
+ attributes: {
2760
+ [attributes3.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
2761
+ }
2762
+ },
2763
+ agentContext,
2764
+ async (agentSpan) => {
2765
+ agentSpan.setType("agent");
2766
+ agentSpan.setInput("chat_messages", this.state.messages);
2767
+ const agentResponse = await agent2.call(agentInput);
2768
+ const endTime = Date.now();
2769
+ const duration = endTime - startTime;
2770
+ this.logger.debug(`[${this.config.id}] Agent responded`, {
2771
+ agentIdx: idx,
2772
+ duration,
2773
+ responseType: typeof agentResponse,
2774
+ isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
2775
+ });
2776
+ this.addAgentTime(idx, duration);
2777
+ this.pendingMessages.delete(idx);
2778
+ if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
2779
+ this.logger.debug(
2780
+ `[${this.config.id}] Agent returned ScenarioResult`,
2781
+ {
2782
+ success: agentResponse.success
2783
+ }
2784
+ );
2785
+ this.setResult(agentResponse);
2786
+ return;
2787
+ }
2788
+ const messages = convertAgentReturnTypesToMessages(
2789
+ agentResponse,
2790
+ role === "User" /* USER */ ? "user" : "assistant"
2791
+ );
2792
+ if (messages.length > 0) {
2793
+ agentSpan.setOutput("chat_messages", messages);
2794
+ }
2795
+ const metrics = {
2796
+ duration: endTime - startTime
2797
+ };
2798
+ if (agentResponse && typeof agentResponse === "object") {
2799
+ const usage = agentResponse.usage;
2800
+ if (usage) {
2801
+ if (usage.prompt_tokens !== void 0)
2802
+ metrics.promptTokens = usage.prompt_tokens;
2803
+ if (usage.completion_tokens !== void 0)
2804
+ metrics.completionTokens = usage.completion_tokens;
2805
+ if (usage.total_tokens !== void 0)
2806
+ metrics.totalTokens = usage.total_tokens;
2807
+ }
2808
+ }
2809
+ agentSpan.setMetrics(metrics);
2810
+ const traceId = agentSpan.spanContext().traceId.toString();
2811
+ for (const message2 of messages) {
2812
+ this.state.addMessage({
2813
+ ...message2,
2814
+ traceId
2815
+ });
2816
+ this.broadcastMessage(message2, idx);
2817
+ }
797
2818
  }
798
2819
  );
799
- throw error;
2820
+ } catch (error) {
2821
+ throw new Error(`[${agentName}] ${error}`, { cause: error });
800
2822
  }
801
2823
  }
802
2824
  /**
@@ -808,7 +2830,7 @@ var ScenarioExecution = class {
808
2830
  * - "assistant" messages are routed to AGENT role agents
809
2831
  * - Other message types are added directly to the conversation
810
2832
  *
811
- * @param message - The CoreMessage to add to the conversation
2833
+ * @param message - The ModelMessage to add to the conversation
812
2834
  *
813
2835
  * @example
814
2836
  * ```typescript
@@ -837,7 +2859,7 @@ var ScenarioExecution = class {
837
2859
  *
838
2860
  * This method is part of the ScenarioExecutionLike interface used by script steps.
839
2861
  *
840
- * @param content - Optional content for the user's message. Can be a string or CoreMessage.
2862
+ * @param content - Optional content for the user's message. Can be a string or ModelMessage.
841
2863
  * If not provided, the user simulator agent will generate the content.
842
2864
  *
843
2865
  * @example
@@ -848,7 +2870,7 @@ var ScenarioExecution = class {
848
2870
  * // Let user simulator generate content
849
2871
  * await execution.user();
850
2872
  *
851
- * // Use a CoreMessage object
2873
+ * // Use a ModelMessage object
852
2874
  * await execution.user({
853
2875
  * role: "user",
854
2876
  * content: "Tell me a joke"
@@ -867,7 +2889,7 @@ var ScenarioExecution = class {
867
2889
  *
868
2890
  * This method is part of the ScenarioExecutionLike interface used by script steps.
869
2891
  *
870
- * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
2892
+ * @param content - Optional content for the agent's response. Can be a string or ModelMessage.
871
2893
  * If not provided, the agent under test will generate the response.
872
2894
  *
873
2895
  * @example
@@ -878,7 +2900,7 @@ var ScenarioExecution = class {
878
2900
  * // Use provided content
879
2901
  * await execution.agent("The weather is sunny today!");
880
2902
  *
881
- * // Use a CoreMessage object
2903
+ * // Use a ModelMessage object
882
2904
  * await execution.agent({
883
2905
  * role: "assistant",
884
2906
  * content: "I'm here to help you with weather information."
@@ -959,17 +2981,22 @@ var ScenarioExecution = class {
959
2981
  * ```
960
2982
  */
961
2983
  async proceed(turns, onTurn, onStep) {
2984
+ this.logger.debug(`[${this.config.id}] proceed called`, {
2985
+ turns,
2986
+ currentTurn: this.state.currentTurn
2987
+ });
962
2988
  let initialTurn = this.state.currentTurn;
963
2989
  while (true) {
964
2990
  const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
965
- const nextMessage = await this._step(goToNextTurn, onTurn);
2991
+ await this._step(goToNextTurn, onTurn);
966
2992
  if (initialTurn === null) initialTurn = this.state.currentTurn;
967
- if (nextMessage === null) {
968
- return null;
2993
+ if (this.result) {
2994
+ return this.result;
969
2995
  }
970
2996
  if (onStep) await onStep(this.state);
971
- if (nextMessage !== null && typeof nextMessage === "object" && "success" in nextMessage)
972
- return nextMessage;
2997
+ if (!goToNextTurn) {
2998
+ return null;
2999
+ }
973
3000
  }
974
3001
  }
975
3002
  /**
@@ -996,13 +3023,13 @@ var ScenarioExecution = class {
996
3023
  * ```
997
3024
  */
998
3025
  async succeed(reasoning) {
999
- return {
3026
+ this.setResult({
1000
3027
  success: true,
1001
- messages: this.state.messages,
1002
3028
  reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
1003
3029
  metCriteria: [],
1004
3030
  unmetCriteria: []
1005
- };
3031
+ });
3032
+ return this.result;
1006
3033
  }
1007
3034
  /**
1008
3035
  * Immediately ends the scenario with a failure verdict.
@@ -1028,13 +3055,13 @@ var ScenarioExecution = class {
1028
3055
  * ```
1029
3056
  */
1030
3057
  async fail(reasoning) {
1031
- return {
3058
+ this.setResult({
1032
3059
  success: false,
1033
- messages: this.state.messages,
1034
3060
  reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
1035
3061
  metCriteria: [],
1036
3062
  unmetCriteria: []
1037
- };
3063
+ });
3064
+ return this.result;
1038
3065
  }
1039
3066
  /**
1040
3067
  * Adds execution time for a specific agent to the performance tracking.
@@ -1057,53 +3084,6 @@ var ScenarioExecution = class {
1057
3084
  const currentTime = this.agentTimes.get(agentIdx) || 0;
1058
3085
  this.agentTimes.set(agentIdx, currentTime + time);
1059
3086
  }
1060
- /**
1061
- * Checks if a partial result has been set for the scenario.
1062
- *
1063
- * This method is used internally to determine if a scenario has already reached
1064
- * a conclusion (success or failure) but hasn't been finalized yet. Partial results
1065
- * are typically set by agents that make final decisions (like judge agents) and
1066
- * are later finalized with the complete message history.
1067
- *
1068
- * @returns True if a partial result exists, false otherwise
1069
- *
1070
- * @example
1071
- * ```typescript
1072
- * // This is typically used internally by the execution engine
1073
- * if (execution.hasResult()) {
1074
- * console.log('Scenario has reached a conclusion');
1075
- * }
1076
- * ```
1077
- */
1078
- hasResult() {
1079
- return this.partialResult !== null;
1080
- }
1081
- /**
1082
- * Sets a partial result for the scenario.
1083
- *
1084
- * This method is used internally to store intermediate results that may be
1085
- * finalized later with the complete message history. Partial results are typically
1086
- * created by agents that make final decisions (like judge agents) and contain
1087
- * the success/failure status, reasoning, and criteria evaluation, but not the
1088
- * complete message history.
1089
- *
1090
- * @param result - The partial result without the messages field. Should include
1091
- * success status, reasoning, and criteria evaluation.
1092
- *
1093
- * @example
1094
- * ```typescript
1095
- * // This is typically called internally by agents that make final decisions
1096
- * execution.setResult({
1097
- * success: true,
1098
- * reasoning: "Agent provided accurate weather information",
1099
- * metCriteria: ["Provides accurate weather data"],
1100
- * unmetCriteria: []
1101
- * });
1102
- * ```
1103
- */
1104
- setResult(result) {
1105
- this.partialResult = result;
1106
- }
1107
3087
  /**
1108
3088
  * Internal method to handle script step calls to agents.
1109
3089
  *
@@ -1116,7 +3096,7 @@ var ScenarioExecution = class {
1116
3096
  * - Progress to a new turn if no agent is available
1117
3097
  * - Execute the agent with the provided content or let it generate content
1118
3098
  * - Handle judgment requests for judge agents
1119
- * - Return a final result if the agent makes a decision
3099
+ * - Set the result if the agent makes a decision
1120
3100
  *
1121
3101
  * @param role - The role of the agent to call (USER, AGENT, or JUDGE)
1122
3102
  * @param content - Optional content to use instead of letting the agent generate it
@@ -1126,6 +3106,11 @@ var ScenarioExecution = class {
1126
3106
  * @throws Error if no agent is found for the specified role
1127
3107
  */
1128
3108
  async scriptCallAgent(role, content, judgmentRequest = false) {
3109
+ this.logger.debug(`[${this.config.id}] scriptCallAgent`, {
3110
+ role,
3111
+ hasContent: content !== void 0,
3112
+ judgmentRequest
3113
+ });
1129
3114
  this.consumeUntilRole(role);
1130
3115
  let index = -1;
1131
3116
  let agent2 = null;
@@ -1170,11 +3155,8 @@ var ScenarioExecution = class {
1170
3155
  this.broadcastMessage(message2, index);
1171
3156
  return null;
1172
3157
  }
1173
- const result = await this.callAgent(index, role, judgmentRequest);
1174
- if (result && typeof result === "object" && "success" in result) {
1175
- return result;
1176
- }
1177
- return null;
3158
+ await this.callAgent(index, role, judgmentRequest);
3159
+ return this.result ?? null;
1178
3160
  }
1179
3161
  /**
1180
3162
  * Resets the scenario execution to its initial state.
@@ -1190,8 +3172,14 @@ var ScenarioExecution = class {
1190
3172
  * - Starts the first turn
1191
3173
  * - Records the start time for performance tracking
1192
3174
  * - Clears any pending messages
3175
+ * - Clears the result from any previous execution
1193
3176
  */
1194
3177
  reset() {
3178
+ this.logger.debug(`[${this.config.id}] Resetting scenario execution`);
3179
+ if (this.currentTurnSpan) {
3180
+ this.currentTurnSpan.end();
3181
+ this.currentTurnSpan = void 0;
3182
+ }
1195
3183
  this.state = new ScenarioExecutionState(this.config);
1196
3184
  this.state.threadId = this.config.threadId || generateThreadId();
1197
3185
  this.setAgents(this.config.agents);
@@ -1199,6 +3187,11 @@ var ScenarioExecution = class {
1199
3187
  this.state.currentTurn = 0;
1200
3188
  this.totalStartTime = Date.now();
1201
3189
  this.pendingMessages.clear();
3190
+ this._result = void 0;
3191
+ this.logger.debug(`[${this.config.id}] Reset complete`, {
3192
+ threadId: this.state.threadId,
3193
+ agentCount: this.agents.length
3194
+ });
1202
3195
  }
1203
3196
  nextAgentForRole(role) {
1204
3197
  for (const agent2 of this.agents) {
@@ -1219,6 +3212,11 @@ var ScenarioExecution = class {
1219
3212
  * multiple agent interactions as agents respond to each other's messages.
1220
3213
  */
1221
3214
  newTurn() {
3215
+ const previousTurn = this.state.currentTurn;
3216
+ if (this.currentTurnSpan) {
3217
+ this.currentTurnSpan.end();
3218
+ this.currentTurnSpan = void 0;
3219
+ }
1222
3220
  this.pendingAgentsOnTurn = new Set(this.agents);
1223
3221
  this.pendingRolesOnTurn = [
1224
3222
  "User" /* USER */,
@@ -1230,6 +3228,19 @@ var ScenarioExecution = class {
1230
3228
  } else {
1231
3229
  this.state.currentTurn++;
1232
3230
  }
3231
+ this.logger.debug(`[${this.config.id}] New turn started`, {
3232
+ previousTurn,
3233
+ currentTurn: this.state.currentTurn,
3234
+ agentCount: this.agents.length
3235
+ });
3236
+ this.currentTurnSpan = this.tracer.startSpan("Scenario Turn", {
3237
+ attributes: {
3238
+ "scenario.name": this.config.name,
3239
+ "scenario.id": this.config.id,
3240
+ [attributes3.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
3241
+ "scenario.turn": this.state.currentTurn
3242
+ }
3243
+ });
1233
3244
  }
1234
3245
  removePendingRole(role) {
1235
3246
  const index = this.pendingRolesOnTurn.indexOf(role);
@@ -1265,7 +3276,7 @@ var ScenarioExecution = class {
1265
3276
  *
1266
3277
  * This method is called when the scenario execution reaches the maximum number
1267
3278
  * of turns without reaching a conclusion. It creates a failure result with
1268
- * appropriate reasoning and includes performance metrics.
3279
+ * appropriate reasoning and includes performance metrics, then sets it on this.result.
1269
3280
  *
1270
3281
  * The result includes:
1271
3282
  * - All messages from the conversation
@@ -1275,24 +3286,15 @@ var ScenarioExecution = class {
1275
3286
  * - Total execution time and agent response times
1276
3287
  *
1277
3288
  * @param errorMessage - Optional custom error message to use instead of the default
1278
- * @returns A ScenarioResult indicating failure due to reaching max turns
1279
3289
  */
1280
3290
  reachedMaxTurns(errorMessage) {
1281
3291
  var _a;
1282
- const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
1283
- const agentTimes = agentRoleAgentsIdx.map(
1284
- (i) => this.agentTimes.get(i) || 0
1285
- );
1286
- const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
1287
- return {
3292
+ this.setResult({
1288
3293
  success: false,
1289
- messages: this.state.messages,
1290
3294
  reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
1291
3295
  metCriteria: [],
1292
- unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? [],
1293
- totalTime: this.totalTime,
1294
- agentTime: totalAgentTime
1295
- };
3296
+ unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? []
3297
+ });
1296
3298
  }
1297
3299
  getJudgeAgent() {
1298
3300
  return this.agents.find((agent2) => agent2 instanceof JudgeAgentAdapter) ?? null;
@@ -1364,6 +3366,10 @@ var ScenarioExecution = class {
1364
3366
  };
1365
3367
  this.emitEvent(event);
1366
3368
  this.eventSubject.complete();
3369
+ if (this.currentTurnSpan) {
3370
+ this.currentTurnSpan.end();
3371
+ this.currentTurnSpan = void 0;
3372
+ }
1367
3373
  }
1368
3374
  /**
1369
3375
  * Distributes a message to all other agents in the scenario.
@@ -1395,13 +3401,20 @@ var ScenarioExecution = class {
1395
3401
  * ```
1396
3402
  */
1397
3403
  broadcastMessage(message2, fromAgentIdx) {
3404
+ const recipients = [];
1398
3405
  for (let idx = 0; idx < this.agents.length; idx++) {
1399
3406
  if (idx === fromAgentIdx) continue;
1400
3407
  if (!this.pendingMessages.has(idx)) {
1401
3408
  this.pendingMessages.set(idx, []);
1402
3409
  }
1403
3410
  this.pendingMessages.get(idx).push(message2);
3411
+ recipients.push(idx);
1404
3412
  }
3413
+ this.logger.debug(`[${this.config.id}] Broadcast message`, {
3414
+ role: message2.role,
3415
+ fromAgentIdx,
3416
+ recipients
3417
+ });
1405
3418
  }
1406
3419
  /**
1407
3420
  * Executes a single script step with proper error handling and logging.
@@ -1460,7 +3473,8 @@ function convertAgentReturnTypesToMessages(response, role) {
1460
3473
  if (typeof response === "string")
1461
3474
  return [{ role, content: response }];
1462
3475
  if (Array.isArray(response)) return response;
1463
- if (typeof response === "object" && "role" in response) return [response];
3476
+ if (response && typeof response === "object" && "role" in response)
3477
+ return [response];
1464
3478
  return [];
1465
3479
  }
1466
3480
  function extractErrorInfo(error) {
@@ -1483,6 +3497,289 @@ __export(runner_exports, {
1483
3497
  run: () => run
1484
3498
  });
1485
3499
 
3500
+ // src/events/event-bus.ts
3501
+ import {
3502
+ concatMap,
3503
+ EMPTY,
3504
+ catchError,
3505
+ Subject as Subject3,
3506
+ tap,
3507
+ map
3508
+ } from "rxjs";
3509
+
3510
+ // src/events/event-alert-message-logger.ts
3511
+ import * as fs2 from "fs";
3512
+ import * as os from "os";
3513
+ import * as path2 from "path";
3514
+ import open from "open";
3515
+ var EventAlertMessageLogger = class {
3516
+ /**
3517
+ * Creates a coordination file to prevent duplicate messages across processes.
3518
+ * Returns true if this process should show the message (first one to create the file).
3519
+ */
3520
+ createCoordinationFile(type) {
3521
+ try {
3522
+ const batchId = getBatchRunId();
3523
+ const tmpDir = os.tmpdir();
3524
+ const fileName = `scenario-${type}-${batchId}`;
3525
+ const filePath = path2.join(tmpDir, fileName);
3526
+ fs2.writeFileSync(filePath, process.pid.toString(), { flag: "wx" });
3527
+ return true;
3528
+ } catch {
3529
+ return false;
3530
+ }
3531
+ }
3532
+ /**
3533
+ * Shows a fancy greeting message about simulation reporting status.
3534
+ * Only shows once per batch run to avoid spam.
3535
+ */
3536
+ handleGreeting() {
3537
+ if (this.isGreetingDisabled()) {
3538
+ return;
3539
+ }
3540
+ if (!this.createCoordinationFile("greeting")) {
3541
+ return;
3542
+ }
3543
+ this.displayGreeting();
3544
+ }
3545
+ /**
3546
+ * Shows a fancy message about how to watch the simulation.
3547
+ * Called when a run started event is received with a session ID.
3548
+ */
3549
+ async handleWatchMessage(params) {
3550
+ if (this.isGreetingDisabled()) {
3551
+ return;
3552
+ }
3553
+ if (!this.createCoordinationFile(`watch-${params.scenarioSetId}`)) {
3554
+ return;
3555
+ }
3556
+ await this.displayWatchMessage(params);
3557
+ }
3558
+ isGreetingDisabled() {
3559
+ return getEnv().SCENARIO_DISABLE_SIMULATION_REPORT_INFO === true;
3560
+ }
3561
+ displayGreeting() {
3562
+ const separator = "\u2500".repeat(60);
3563
+ const env = getEnv();
3564
+ if (!env.LANGWATCH_API_KEY) {
3565
+ console.log(`
3566
+ ${separator}`);
3567
+ console.log("\u{1F3AD} Running Scenario Tests");
3568
+ console.log(`${separator}`);
3569
+ console.log("\u27A1\uFE0F LangWatch API key not configured");
3570
+ console.log(" Simulations will only output final results");
3571
+ console.log("");
3572
+ console.log("\u{1F4A1} To visualize conversations in real time:");
3573
+ console.log(" \u2022 Set LANGWATCH_API_KEY environment variable");
3574
+ console.log(" \u2022 Or configure apiKey in scenario.config.js");
3575
+ console.log("");
3576
+ console.log(`${separator}
3577
+ `);
3578
+ }
3579
+ }
3580
+ async displayWatchMessage(params) {
3581
+ const separator = "\u2500".repeat(60);
3582
+ const setUrl = params.setUrl;
3583
+ const batchUrl = `${setUrl}/${getBatchRunId()}`;
3584
+ console.log(`
3585
+ ${separator}`);
3586
+ console.log("\u{1F3AD} Running Scenario Tests");
3587
+ console.log(`${separator}`);
3588
+ console.log(`Follow it live: ${batchUrl}`);
3589
+ console.log(`${separator}
3590
+ `);
3591
+ const projectConfig = await getProjectConfig();
3592
+ if (!(projectConfig == null ? void 0 : projectConfig.headless)) {
3593
+ try {
3594
+ open(batchUrl);
3595
+ } catch (_) {
3596
+ }
3597
+ }
3598
+ }
3599
+ };
3600
+
3601
+ // src/events/event-reporter.ts
3602
+ var EventReporter = class {
3603
+ apiKey;
3604
+ eventsEndpoint;
3605
+ eventAlertMessageLogger;
3606
+ logger = new Logger("scenario.events.EventReporter");
3607
+ isEnabled;
3608
+ constructor(config2) {
3609
+ this.apiKey = config2.apiKey ?? "";
3610
+ this.eventsEndpoint = new URL("/api/scenario-events", config2.endpoint);
3611
+ this.eventAlertMessageLogger = new EventAlertMessageLogger();
3612
+ this.eventAlertMessageLogger.handleGreeting();
3613
+ this.isEnabled = this.apiKey.length > 0 && this.eventsEndpoint.href.length > 0;
3614
+ }
3615
+ /**
3616
+ * Posts an event to the configured endpoint.
3617
+ * Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
3618
+ */
3619
+ async postEvent(event) {
3620
+ if (!this.isEnabled) return {};
3621
+ const result = {};
3622
+ this.logger.debug(`[${event.type}] Posting event`, { event });
3623
+ const processedEvent = this.processEventForApi(event);
3624
+ try {
3625
+ const response = await fetch(this.eventsEndpoint.href, {
3626
+ method: "POST",
3627
+ body: JSON.stringify(processedEvent),
3628
+ headers: {
3629
+ "Content-Type": "application/json",
3630
+ "X-Auth-Token": this.apiKey
3631
+ }
3632
+ });
3633
+ this.logger.debug(
3634
+ `[${event.type}] Event POST response status: ${response.status}`
3635
+ );
3636
+ if (response.ok) {
3637
+ const data = await response.json();
3638
+ this.logger.debug(`[${event.type}] Event POST response:`, data);
3639
+ result.setUrl = data.url;
3640
+ } else {
3641
+ const errorText = await response.text();
3642
+ this.logger.error(`[${event.type}] Event POST failed:`, {
3643
+ endpoint: this.eventsEndpoint.href,
3644
+ status: response.status,
3645
+ statusText: response.statusText,
3646
+ error: errorText,
3647
+ event: JSON.stringify(processedEvent)
3648
+ });
3649
+ }
3650
+ } catch (error) {
3651
+ this.logger.error(`[${event.type}] Event POST error:`, {
3652
+ error,
3653
+ event: JSON.stringify(processedEvent),
3654
+ endpoint: this.eventsEndpoint.href
3655
+ });
3656
+ }
3657
+ return result;
3658
+ }
3659
+ /**
3660
+ * Processes event data to ensure API compatibility.
3661
+ * Converts message content objects to strings when needed.
3662
+ */
3663
+ processEventForApi(event) {
3664
+ if (event.type === "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */) {
3665
+ return {
3666
+ ...event,
3667
+ messages: event.messages.map((message2) => ({
3668
+ ...message2,
3669
+ content: typeof message2.content !== "string" ? JSON.stringify(message2.content) : message2.content
3670
+ }))
3671
+ };
3672
+ }
3673
+ return event;
3674
+ }
3675
+ };
3676
+
3677
+ // src/events/event-bus.ts
3678
+ var EventBus = class _EventBus {
3679
+ static registry = /* @__PURE__ */ new Set();
3680
+ events$ = new Subject3();
3681
+ eventReporter;
3682
+ eventAlertMessageLogger;
3683
+ processingPromise = null;
3684
+ logger = new Logger("scenario.events.EventBus");
3685
+ static globalListeners = [];
3686
+ constructor(config2) {
3687
+ this.eventReporter = new EventReporter(config2);
3688
+ this.eventAlertMessageLogger = new EventAlertMessageLogger();
3689
+ _EventBus.registry.add(this);
3690
+ for (const listener of _EventBus.globalListeners) {
3691
+ listener(this);
3692
+ }
3693
+ }
3694
+ static getAllBuses() {
3695
+ return _EventBus.registry;
3696
+ }
3697
+ static addGlobalListener(listener) {
3698
+ _EventBus.globalListeners.push(listener);
3699
+ }
3700
+ /**
3701
+ * Publishes an event into the processing pipeline.
3702
+ */
3703
+ publish(event) {
3704
+ this.logger.debug(`[${event.type}] Publishing event`, {
3705
+ event
3706
+ });
3707
+ this.events$.next(event);
3708
+ }
3709
+ /**
3710
+ * Begins listening for and processing events.
3711
+ * Returns a promise that resolves when a RUN_FINISHED event is fully processed.
3712
+ */
3713
+ listen() {
3714
+ this.logger.debug("Listening for events");
3715
+ if (this.processingPromise) {
3716
+ return this.processingPromise;
3717
+ }
3718
+ this.processingPromise = new Promise((resolve, reject) => {
3719
+ this.events$.pipe(
3720
+ // Post events and get results
3721
+ concatMap(async (event) => {
3722
+ this.logger.debug(`[${event.type}] Processing event`, { event });
3723
+ const result = await this.eventReporter.postEvent(event);
3724
+ return { event, result };
3725
+ }),
3726
+ // Handle watch messages reactively
3727
+ tap(async ({ event, result }) => {
3728
+ if (event.type === "SCENARIO_RUN_STARTED" /* RUN_STARTED */ && result.setUrl) {
3729
+ await this.eventAlertMessageLogger.handleWatchMessage({
3730
+ scenarioSetId: event.scenarioSetId,
3731
+ scenarioRunId: event.scenarioRunId,
3732
+ setUrl: result.setUrl
3733
+ });
3734
+ }
3735
+ }),
3736
+ // Extract just the event for downstream processing
3737
+ map(({ event }) => event),
3738
+ catchError((error) => {
3739
+ this.logger.error("Error in event stream:", error);
3740
+ return EMPTY;
3741
+ })
3742
+ ).subscribe({
3743
+ next: (event) => {
3744
+ this.logger.debug(`[${event.type}] Event processed`, { event });
3745
+ if (event.type === "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */) {
3746
+ resolve();
3747
+ }
3748
+ },
3749
+ error: (error) => {
3750
+ this.logger.error("Error in event stream:", error);
3751
+ reject(error);
3752
+ }
3753
+ });
3754
+ });
3755
+ return this.processingPromise;
3756
+ }
3757
+ /**
3758
+ * Stops accepting new events and drains the processing queue.
3759
+ */
3760
+ async drain() {
3761
+ this.logger.debug("Draining event stream");
3762
+ this.events$.complete();
3763
+ if (this.processingPromise) {
3764
+ await this.processingPromise;
3765
+ }
3766
+ }
3767
+ /**
3768
+ * Subscribes to an event stream.
3769
+ * @param source$ - The event stream to subscribe to.
3770
+ */
3771
+ subscribeTo(source$) {
3772
+ this.logger.debug("Subscribing to event stream");
3773
+ return source$.subscribe(this.events$);
3774
+ }
3775
+ /**
3776
+ * Expose the events$ observable for external subscription (read-only).
3777
+ */
3778
+ get eventsObservable() {
3779
+ return this.events$.asObservable();
3780
+ }
3781
+ };
3782
+
1486
3783
  // src/script/index.ts
1487
3784
  var script_exports = {};
1488
3785
  __export(script_exports, {
@@ -1501,19 +3798,27 @@ var agent = (content) => {
1501
3798
  return (_state, executor) => executor.agent(content);
1502
3799
  };
1503
3800
  var judge = (content) => {
1504
- return (_state, executor) => executor.judge(content);
3801
+ return async (_state, executor) => {
3802
+ await executor.judge(content);
3803
+ };
1505
3804
  };
1506
3805
  var user = (content) => {
1507
3806
  return (_state, executor) => executor.user(content);
1508
3807
  };
1509
3808
  var proceed = (turns, onTurn, onStep) => {
1510
- return (_state, executor) => executor.proceed(turns, onTurn, onStep);
3809
+ return async (_state, executor) => {
3810
+ await executor.proceed(turns, onTurn, onStep);
3811
+ };
1511
3812
  };
1512
3813
  var succeed = (reasoning) => {
1513
- return (_state, executor) => executor.succeed(reasoning);
3814
+ return async (_state, executor) => {
3815
+ await executor.succeed(reasoning);
3816
+ };
1514
3817
  };
1515
3818
  var fail = (reasoning) => {
1516
- return (_state, executor) => executor.fail(reasoning);
3819
+ return async (_state, executor) => {
3820
+ await executor.fail(reasoning);
3821
+ };
1517
3822
  };
1518
3823
 
1519
3824
  // src/runner/run.ts
@@ -1524,7 +3829,7 @@ async function run(cfg) {
1524
3829
  if (!cfg.description) {
1525
3830
  throw new Error("Scenario description is required");
1526
3831
  }
1527
- if ((cfg.maxTurns || 10) < 1) {
3832
+ if (cfg.maxTurns && cfg.maxTurns < 1) {
1528
3833
  throw new Error("Max turns must be at least 1");
1529
3834
  }
1530
3835
  if (cfg.agents.length === 0) {
@@ -1546,10 +3851,10 @@ async function run(cfg) {
1546
3851
  let eventBus = null;
1547
3852
  let subscription = null;
1548
3853
  try {
1549
- const envConfig = getEnv();
3854
+ const envConfig2 = getEnv();
1550
3855
  eventBus = new EventBus({
1551
- endpoint: envConfig.LANGWATCH_ENDPOINT,
1552
- apiKey: envConfig.LANGWATCH_API_KEY
3856
+ endpoint: envConfig2.LANGWATCH_ENDPOINT,
3857
+ apiKey: envConfig2.LANGWATCH_API_KEY
1553
3858
  });
1554
3859
  eventBus.listen();
1555
3860
  subscription = eventBus.subscribeTo(execution.events$);
@@ -1600,14 +3905,13 @@ function formatPart(part) {
1600
3905
  case "file":
1601
3906
  return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
1602
3907
  case "tool-call":
1603
- return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
3908
+ return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
1604
3909
  case "tool-result":
1605
- return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
3910
+ return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
1606
3911
  case "reasoning":
1607
3912
  return `(reasoning): ${part.text}`;
1608
- case "redacted-reasoning":
1609
- return `(redacted reasoning): ${part.data}`;
1610
3913
  default:
3914
+ part;
1611
3915
  return `Unknown content: ${JSON.stringify(part)}`;
1612
3916
  }
1613
3917
  }
@@ -1625,9 +3929,11 @@ export {
1625
3929
  AgentAdapter,
1626
3930
  AgentRole,
1627
3931
  DEFAULT_MAX_TURNS,
1628
- DEFAULT_TEMPERATURE,
1629
3932
  DEFAULT_VERBOSE,
1630
3933
  JudgeAgentAdapter,
3934
+ JudgeSpanCollector,
3935
+ JudgeSpanDigestFormatter,
3936
+ RealtimeAgentAdapter,
1631
3937
  ScenarioExecution,
1632
3938
  ScenarioExecutionState,
1633
3939
  StateChangeEventType,
@@ -1639,6 +3945,8 @@ export {
1639
3945
  fail,
1640
3946
  judge,
1641
3947
  judgeAgent,
3948
+ judgeSpanCollector,
3949
+ judgeSpanDigestFormatter,
1642
3950
  message,
1643
3951
  proceed,
1644
3952
  run,