@langwatch/scenario 0.2.13 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -33,9 +33,11 @@ __export(index_exports, {
33
33
  AgentAdapter: () => AgentAdapter,
34
34
  AgentRole: () => AgentRole,
35
35
  DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
36
- DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
37
36
  DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
38
37
  JudgeAgentAdapter: () => JudgeAgentAdapter,
38
+ JudgeSpanCollector: () => JudgeSpanCollector,
39
+ JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
40
+ RealtimeAgentAdapter: () => RealtimeAgentAdapter,
39
41
  ScenarioExecution: () => ScenarioExecution,
40
42
  ScenarioExecutionState: () => ScenarioExecutionState,
41
43
  StateChangeEventType: () => StateChangeEventType,
@@ -47,6 +49,8 @@ __export(index_exports, {
47
49
  fail: () => fail,
48
50
  judge: () => judge,
49
51
  judgeAgent: () => judgeAgent,
52
+ judgeSpanCollector: () => judgeSpanCollector,
53
+ judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
50
54
  message: () => message,
51
55
  proceed: () => proceed,
52
56
  run: () => run,
@@ -58,132 +62,53 @@ __export(index_exports, {
58
62
  });
59
63
  module.exports = __toCommonJS(index_exports);
60
64
 
61
- // src/agents/index.ts
62
- var agents_exports = {};
63
- __export(agents_exports, {
64
- judgeAgent: () => judgeAgent,
65
- userSimulatorAgent: () => userSimulatorAgent
66
- });
67
-
68
- // src/agents/judge-agent.ts
69
- var import_ai = require("ai");
70
- var import_zod3 = require("zod");
71
-
72
- // src/domain/index.ts
73
- var domain_exports = {};
74
- __export(domain_exports, {
75
- AgentAdapter: () => AgentAdapter,
76
- AgentRole: () => AgentRole,
77
- DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
78
- DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
79
- DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
80
- JudgeAgentAdapter: () => JudgeAgentAdapter,
81
- UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
82
- allAgentRoles: () => allAgentRoles,
83
- defineConfig: () => defineConfig,
84
- scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
85
- });
86
-
87
- // src/domain/core/config.ts
88
- var import_zod = require("zod");
89
- var DEFAULT_TEMPERATURE = 0;
90
- var scenarioProjectConfigSchema = import_zod.z.object({
91
- defaultModel: import_zod.z.object({
92
- model: import_zod.z.custom(),
93
- temperature: import_zod.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
94
- maxTokens: import_zod.z.number().optional()
95
- }).optional(),
96
- headless: import_zod.z.boolean().optional().default(
97
- typeof process !== "undefined" ? !["false", "0"].includes(process.env.SCENARIO_HEADLESS || "false") : false
98
- )
99
- }).strict();
100
- function defineConfig(config2) {
101
- return config2;
102
- }
103
-
104
- // src/domain/agents/index.ts
105
- var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
106
- AgentRole2["USER"] = "User";
107
- AgentRole2["AGENT"] = "Agent";
108
- AgentRole2["JUDGE"] = "Judge";
109
- return AgentRole2;
110
- })(AgentRole || {});
111
- var allAgentRoles = [
112
- "User" /* USER */,
113
- "Agent" /* AGENT */,
114
- "Judge" /* JUDGE */
115
- ];
116
- var AgentAdapter = class {
117
- role = "Agent" /* AGENT */;
118
- };
119
- var UserSimulatorAgentAdapter = class {
120
- role = "User" /* USER */;
121
- };
122
- var JudgeAgentAdapter = class {
123
- role = "Judge" /* JUDGE */;
124
- };
125
-
126
- // src/domain/scenarios/index.ts
127
- var DEFAULT_MAX_TURNS = 10;
128
- var DEFAULT_VERBOSE = false;
65
+ // src/tracing/setup.ts
66
+ var import_node = require("langwatch/observability/node");
129
67
 
130
- // src/agents/utils.ts
131
- var toolMessageRole = "tool";
132
- var assistantMessageRole = "assistant";
133
- var userMessageRole = "user";
134
- var groupMessagesByToolBoundaries = (messages) => {
135
- const segments = [];
136
- let currentSegment = [];
137
- for (const message2 of messages) {
138
- currentSegment.push(message2);
139
- if (message2.role === toolMessageRole) {
140
- segments.push(currentSegment);
141
- currentSegment = [];
142
- }
68
+ // src/agents/judge/judge-span-collector.ts
69
+ var import_observability = require("langwatch/observability");
70
+ var JudgeSpanCollector = class {
71
+ spans = [];
72
+ onStart() {
143
73
  }
144
- if (currentSegment.length > 0) {
145
- segments.push(currentSegment);
74
+ onEnd(span) {
75
+ this.spans.push(span);
146
76
  }
147
- return segments;
148
- };
149
- var segmentHasToolMessages = (segment) => {
150
- return segment.some((message2) => {
151
- if (message2.role === toolMessageRole) return true;
152
- if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
153
- return message2.content.some((part) => part.type === "tool-call");
77
+ forceFlush() {
78
+ return Promise.resolve();
79
+ }
80
+ shutdown() {
81
+ this.spans = [];
82
+ return Promise.resolve();
83
+ }
84
+ /**
85
+ * Retrieves all spans associated with a specific thread.
86
+ * @param threadId - The thread identifier to filter spans by
87
+ * @returns Array of spans for the given thread
88
+ */
89
+ getSpansForThread(threadId) {
90
+ const spanMap = /* @__PURE__ */ new Map();
91
+ for (const span of this.spans) {
92
+ spanMap.set(span.spanContext().spanId, span);
154
93
  }
155
- return false;
156
- });
157
- };
158
- var reverseSegmentRoles = (segment) => {
159
- return segment.map((message2) => {
160
- const hasStringContent = typeof message2.content === "string";
161
- if (!hasStringContent) return message2;
162
- const roleMap = {
163
- [userMessageRole]: assistantMessageRole,
164
- [assistantMessageRole]: userMessageRole
165
- };
166
- const newRole = roleMap[message2.role];
167
- if (!newRole) return message2;
168
- return {
169
- role: newRole,
170
- content: message2.content
94
+ const belongsToThread = (span) => {
95
+ var _a;
96
+ if (span.attributes[import_observability.attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
97
+ return true;
98
+ }
99
+ const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
100
+ if (parentId && spanMap.has(parentId)) {
101
+ return belongsToThread(spanMap.get(parentId));
102
+ }
103
+ return false;
171
104
  };
172
- });
173
- };
174
- var messageRoleReversal = (messages) => {
175
- const segments = groupMessagesByToolBoundaries(messages);
176
- const processedSegments = segments.map(
177
- (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
178
- );
179
- return processedSegments.flat();
180
- };
181
- var criterionToParamName = (criterion) => {
182
- return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
105
+ return this.spans.filter(belongsToThread);
106
+ }
183
107
  };
108
+ var judgeSpanCollector = new JudgeSpanCollector();
184
109
 
185
110
  // src/config/env.ts
186
- var import_zod2 = require("zod");
111
+ var import_v4 = require("zod/v4");
187
112
 
188
113
  // src/config/log-levels.ts
189
114
  var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
@@ -196,37 +121,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
196
121
  var LOG_LEVELS = Object.values(LogLevel);
197
122
 
198
123
  // src/config/env.ts
199
- var envSchema = import_zod2.z.object({
124
+ var envSchema = import_v4.z.object({
200
125
  /**
201
126
  * LangWatch API key for event reporting.
202
127
  * If not provided, events will not be sent to LangWatch.
203
128
  */
204
- LANGWATCH_API_KEY: import_zod2.z.string().optional(),
129
+ LANGWATCH_API_KEY: import_v4.z.string().optional(),
205
130
  /**
206
131
  * LangWatch endpoint URL for event reporting.
207
132
  * Defaults to the production LangWatch endpoint.
208
133
  */
209
- LANGWATCH_ENDPOINT: import_zod2.z.string().url().optional().default("https://app.langwatch.ai"),
134
+ LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
210
135
  /**
211
136
  * Disables simulation report info messages when set to any truthy value.
212
137
  * Useful for CI/CD environments or when you want cleaner output.
213
138
  */
214
- SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod2.z.string().optional().transform((val) => Boolean(val)),
139
+ SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
215
140
  /**
216
141
  * Node environment - affects logging and behavior.
217
142
  * Defaults to 'development' if not specified.
218
143
  */
219
- NODE_ENV: import_zod2.z.enum(["development", "production", "test"]).default("development"),
144
+ NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
220
145
  /**
221
146
  * Case-insensitive log level for the scenario package.
222
147
  * Defaults to 'info' if not specified.
223
148
  */
224
- LOG_LEVEL: import_zod2.z.string().toUpperCase().pipe(import_zod2.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
149
+ LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
225
150
  /**
226
151
  * Scenario batch run ID.
227
152
  * If not provided, a random ID will be generated.
228
153
  */
229
- SCENARIO_BATCH_RUN_ID: import_zod2.z.string().optional()
154
+ SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
230
155
  });
231
156
  function getEnv() {
232
157
  return envSchema.parse(process.env);
@@ -236,6 +161,79 @@ function getEnv() {
236
161
  var import_promises = __toESM(require("fs/promises"));
237
162
  var import_node_path = __toESM(require("path"));
238
163
  var import_node_url = require("url");
164
+
165
+ // src/domain/index.ts
166
+ var domain_exports = {};
167
+ __export(domain_exports, {
168
+ AgentAdapter: () => AgentAdapter,
169
+ AgentRole: () => AgentRole,
170
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
171
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
172
+ JudgeAgentAdapter: () => JudgeAgentAdapter,
173
+ UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
174
+ allAgentRoles: () => allAgentRoles,
175
+ defineConfig: () => defineConfig,
176
+ scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
177
+ });
178
+
179
+ // src/domain/core/config.ts
180
+ var import_v43 = require("zod/v4");
181
+
182
+ // src/domain/core/schemas/model.schema.ts
183
+ var import_v42 = require("zod/v4");
184
+
185
+ // src/domain/core/constants.ts
186
+ var DEFAULT_TEMPERATURE = 0;
187
+
188
+ // src/domain/core/schemas/model.schema.ts
189
+ var modelSchema = import_v42.z.object({
190
+ model: import_v42.z.custom((val) => Boolean(val), {
191
+ message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
192
+ }).describe("The OpenAI Language Model to use for generating responses."),
193
+ temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
194
+ maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
195
+ });
196
+
197
+ // src/domain/core/config.ts
198
+ var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
199
+ var scenarioProjectConfigSchema = import_v43.z.object({
200
+ defaultModel: modelSchema.optional(),
201
+ headless: import_v43.z.boolean().optional().default(headless)
202
+ }).strict();
203
+ function defineConfig(config2) {
204
+ return config2;
205
+ }
206
+
207
+ // src/domain/agents/index.ts
208
+ var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
209
+ AgentRole2["USER"] = "User";
210
+ AgentRole2["AGENT"] = "Agent";
211
+ AgentRole2["JUDGE"] = "Judge";
212
+ return AgentRole2;
213
+ })(AgentRole || {});
214
+ var allAgentRoles = [
215
+ "User" /* USER */,
216
+ "Agent" /* AGENT */,
217
+ "Judge" /* JUDGE */
218
+ ];
219
+ var AgentAdapter = class {
220
+ name;
221
+ role = "Agent" /* AGENT */;
222
+ };
223
+ var UserSimulatorAgentAdapter = class extends AgentAdapter {
224
+ name = "UserSimulatorAgent";
225
+ role = "User" /* USER */;
226
+ };
227
+ var JudgeAgentAdapter = class extends AgentAdapter {
228
+ name = "JudgeAgent";
229
+ role = "Judge" /* JUDGE */;
230
+ };
231
+
232
+ // src/domain/scenarios/index.ts
233
+ var DEFAULT_MAX_TURNS = 10;
234
+ var DEFAULT_VERBOSE = false;
235
+
236
+ // src/config/load.ts
239
237
  async function loadScenarioProjectConfig() {
240
238
  const cwd = process.cwd();
241
239
  const configNames = [
@@ -267,14 +265,14 @@ async function loadScenarioProjectConfig() {
267
265
 
268
266
  // src/utils/logger.ts
269
267
  var Logger = class _Logger {
270
- constructor(context) {
271
- this.context = context;
268
+ constructor(context2) {
269
+ this.context = context2;
272
270
  }
273
271
  /**
274
272
  * Creates a logger with context (e.g., class name)
275
273
  */
276
- static create(context) {
277
- return new _Logger(context);
274
+ static create(context2) {
275
+ return new _Logger(context2);
278
276
  }
279
277
  /**
280
278
  * Returns the current log level from environment.
@@ -373,131 +371,612 @@ async function getProjectConfig() {
373
371
  return config;
374
372
  }
375
373
 
376
- // src/utils/config.ts
377
- function mergeConfig(config2, projectConfig) {
378
- if (!projectConfig) {
379
- return config2;
380
- }
381
- return {
382
- ...projectConfig.defaultModel,
383
- ...config2
384
- };
385
- }
386
- function mergeAndValidateConfig(config2, projectConfig) {
387
- var _a;
388
- const mergedConfig = mergeConfig(config2, projectConfig);
389
- mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
390
- if (!mergedConfig.model) {
391
- throw new Error("Model is required");
392
- }
393
- return mergedConfig;
394
- }
395
-
396
- // src/agents/judge-agent.ts
397
- function buildSystemPrompt(criteria, description) {
398
- const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
399
- return `
400
- <role>
401
- You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
402
- </role>
403
-
404
- <goal>
405
- Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
406
- If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
407
- </goal>
374
+ // src/tracing/setup.ts
375
+ var envConfig = getEnv();
376
+ var observabilityHandle = (0, import_node.setupObservability)({
377
+ langwatch: {
378
+ apiKey: envConfig.LANGWATCH_API_KEY,
379
+ endpoint: envConfig.LANGWATCH_ENDPOINT
380
+ },
381
+ spanProcessors: [judgeSpanCollector]
382
+ });
408
383
 
409
- <scenario>
410
- ${description}
411
- </scenario>
384
+ // src/agents/index.ts
385
+ var agents_exports = {};
386
+ __export(agents_exports, {
387
+ JudgeSpanCollector: () => JudgeSpanCollector,
388
+ JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
389
+ RealtimeAgentAdapter: () => RealtimeAgentAdapter,
390
+ judgeAgent: () => judgeAgent,
391
+ judgeSpanCollector: () => judgeSpanCollector,
392
+ judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
393
+ userSimulatorAgent: () => userSimulatorAgent
394
+ });
412
395
 
413
- <criteria>
414
- ${criteriaList}
415
- </criteria>
396
+ // src/agents/judge/judge-agent.ts
397
+ var import_ai2 = require("ai");
398
+ var import_v44 = require("zod/v4");
416
399
 
417
- <rules>
418
- - Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
419
- - DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
420
- </rules>
421
- `.trim();
422
- }
423
- function buildContinueTestTool() {
424
- return (0, import_ai.tool)({
425
- description: "Continue the test with the next step",
426
- parameters: import_zod3.z.object({})
427
- });
428
- }
429
- function buildFinishTestTool(criteria) {
430
- const criteriaNames = criteria.map(criterionToParamName);
431
- return (0, import_ai.tool)({
432
- description: "Complete the test with a final verdict",
433
- parameters: import_zod3.z.object({
434
- criteria: import_zod3.z.object(
435
- Object.fromEntries(
436
- criteriaNames.map((name, idx) => [
437
- name,
438
- import_zod3.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
439
- ])
440
- )
441
- ).strict().describe("Strict verdict for each criterion"),
442
- reasoning: import_zod3.z.string().describe("Explanation of what the final verdict should be"),
443
- verdict: import_zod3.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
444
- })
445
- });
446
- }
447
- var JudgeAgent = class extends JudgeAgentAdapter {
448
- constructor(cfg) {
449
- super();
450
- this.cfg = cfg;
451
- this.criteria = cfg.criteria;
452
- this.role = "Judge" /* JUDGE */;
453
- }
454
- logger = new Logger("JudgeAgent");
455
- role = "Judge" /* JUDGE */;
456
- criteria;
457
- async call(input) {
458
- var _a;
459
- const cfg = this.cfg;
460
- const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
461
- const messages = [
462
- { role: "system", content: systemPrompt },
463
- ...input.messages
464
- ];
465
- const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
466
- const projectConfig = await getProjectConfig();
467
- const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
468
- if (!mergedConfig.model) {
469
- throw new Error("Model is required for the judge agent");
400
+ // src/agents/judge/judge-utils.ts
401
+ function truncateBase64Media(value) {
402
+ var _a;
403
+ if (typeof value === "string") {
404
+ const dataUrlMatch = value.match(
405
+ /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
406
+ );
407
+ if (dataUrlMatch) {
408
+ const mimeType = dataUrlMatch[1];
409
+ const mediaType = dataUrlMatch[2].toUpperCase();
410
+ const size = dataUrlMatch[3].length;
411
+ return `[${mediaType}: ${mimeType}, ~${size} bytes]`;
470
412
  }
471
- const tools = {
472
- continue_test: buildContinueTestTool(),
473
- finish_test: buildFinishTestTool(cfg.criteria)
474
- };
475
- const enforceJudgement = input.judgmentRequest;
476
- const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
477
- if (enforceJudgement && !hasCriteria) {
413
+ return value;
414
+ }
415
+ if (Array.isArray(value)) {
416
+ return value.map(truncateBase64Media);
417
+ }
418
+ if (value && typeof value === "object") {
419
+ const obj = value;
420
+ if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
421
+ const mediaType = obj.mediaType;
422
+ const category = ((_a = mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
478
423
  return {
479
- success: false,
480
- messages: [],
481
- reasoning: "JudgeAgent: No criteria was provided to be judged against",
482
- metCriteria: [],
483
- unmetCriteria: []
424
+ ...obj,
425
+ data: `[${category}: ${mediaType}, ~${obj.data.length} bytes]`
484
426
  };
485
427
  }
486
- const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
487
- const completion = await this.generateText({
488
- model: mergedConfig.model,
489
- messages,
490
- temperature: mergedConfig.temperature ?? 0,
491
- maxTokens: mergedConfig.maxTokens,
428
+ if (obj.type === "image" && typeof obj.image === "string") {
429
+ const imageData = obj.image;
430
+ const dataUrlMatch = imageData.match(
431
+ /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
432
+ );
433
+ if (dataUrlMatch) {
434
+ return {
435
+ ...obj,
436
+ image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
437
+ };
438
+ }
439
+ if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
440
+ return {
441
+ ...obj,
442
+ image: `[IMAGE: unknown, ~${imageData.length} bytes]`
443
+ };
444
+ }
445
+ }
446
+ const result = {};
447
+ for (const [key, val] of Object.entries(obj)) {
448
+ result[key] = truncateBase64Media(val);
449
+ }
450
+ return result;
451
+ }
452
+ return value;
453
+ }
454
+ var JudgeUtils = {
455
+ /**
456
+ * Builds a minimal transcript from messages for judge evaluation.
457
+ * Truncates base64 media to reduce token usage.
458
+ * @param messages - Array of CoreMessage from conversation
459
+ * @returns Plain text transcript with one message per line
460
+ */
461
+ buildTranscriptFromMessages(messages) {
462
+ return messages.map((msg) => {
463
+ const truncatedContent = truncateBase64Media(msg.content);
464
+ return `${msg.role}: ${JSON.stringify(truncatedContent)}`;
465
+ }).join("\n");
466
+ }
467
+ };
468
+
469
+ // src/agents/llm-invoker.factory.ts
470
+ var import_ai = require("ai");
471
+ var createLLMInvoker = (logger2) => {
472
+ return async (params) => {
473
+ try {
474
+ return await (0, import_ai.generateText)({
475
+ ...params,
476
+ experimental_telemetry: { isEnabled: true }
477
+ });
478
+ } catch (error) {
479
+ logger2.error("Error generating text", { error });
480
+ throw error;
481
+ }
482
+ };
483
+ };
484
+
485
+ // src/agents/utils.ts
486
+ var toolMessageRole = "tool";
487
+ var assistantMessageRole = "assistant";
488
+ var userMessageRole = "user";
489
+ var groupMessagesByToolBoundaries = (messages) => {
490
+ const segments = [];
491
+ let currentSegment = [];
492
+ for (const message2 of messages) {
493
+ currentSegment.push(message2);
494
+ if (message2.role === toolMessageRole) {
495
+ segments.push(currentSegment);
496
+ currentSegment = [];
497
+ }
498
+ }
499
+ if (currentSegment.length > 0) {
500
+ segments.push(currentSegment);
501
+ }
502
+ return segments;
503
+ };
504
+ var segmentHasToolMessages = (segment) => {
505
+ return segment.some((message2) => {
506
+ if (message2.role === toolMessageRole) return true;
507
+ if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
508
+ return message2.content.some((part) => part.type === "tool-call");
509
+ }
510
+ return false;
511
+ });
512
+ };
513
+ var reverseSegmentRoles = (segment) => {
514
+ return segment.map((message2) => {
515
+ const hasStringContent = typeof message2.content === "string";
516
+ if (!hasStringContent) return message2;
517
+ const roleMap = {
518
+ [userMessageRole]: assistantMessageRole,
519
+ [assistantMessageRole]: userMessageRole
520
+ };
521
+ const newRole = roleMap[message2.role];
522
+ if (!newRole) return message2;
523
+ return {
524
+ role: newRole,
525
+ content: message2.content
526
+ };
527
+ });
528
+ };
529
+ var messageRoleReversal = (messages) => {
530
+ const segments = groupMessagesByToolBoundaries(messages);
531
+ const processedSegments = segments.map(
532
+ (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
533
+ );
534
+ return processedSegments.flat();
535
+ };
536
+ var criterionToParamName = (criterion) => {
537
+ return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
538
+ };
539
+
540
+ // src/agents/judge/judge-span-digest-formatter.ts
541
+ var import_observability2 = require("langwatch/observability");
542
+
543
+ // src/agents/judge/deep-transform.ts
544
+ function deepTransform(value, fn) {
545
+ const result = fn(value);
546
+ if (result !== value) return result;
547
+ if (Array.isArray(value)) {
548
+ return value.map((v) => deepTransform(v, fn));
549
+ }
550
+ if (value !== null && typeof value === "object") {
551
+ const out = {};
552
+ for (const [k, v] of Object.entries(value)) {
553
+ out[k] = deepTransform(v, fn);
554
+ }
555
+ return out;
556
+ }
557
+ return value;
558
+ }
559
+
560
+ // src/agents/judge/string-deduplicator.ts
561
+ var StringDeduplicator = class {
562
+ seen = /* @__PURE__ */ new Map();
563
+ threshold;
564
+ constructor(params) {
565
+ this.threshold = params.threshold;
566
+ }
567
+ /**
568
+ * Resets seen strings for a new digest.
569
+ */
570
+ reset() {
571
+ this.seen.clear();
572
+ }
573
+ /**
574
+ * Processes a string, returning duplicate marker if seen before.
575
+ * @param str - String to process
576
+ * @returns Original string or duplicate marker
577
+ */
578
+ process(str) {
579
+ if (str.length < this.threshold) return str;
580
+ const key = this.normalize(str);
581
+ if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
582
+ this.seen.set(key, true);
583
+ return str;
584
+ }
585
+ /**
586
+ * Normalizes string for comparison (whitespace, case).
587
+ */
588
+ normalize(str) {
589
+ return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
590
+ }
591
+ };
592
+
593
+ // src/agents/judge/truncate-media.ts
594
+ function truncateMediaUrl(str) {
595
+ const match = str.match(
596
+ /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
597
+ );
598
+ if (!match) return str;
599
+ const [, mimeType, category, data] = match;
600
+ return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
601
+ }
602
+ function truncateMediaPart(v) {
603
+ var _a;
604
+ if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
605
+ const obj = v;
606
+ if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
607
+ const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
608
+ return {
609
+ ...obj,
610
+ data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
611
+ };
612
+ }
613
+ if (obj.type === "image" && typeof obj.image === "string") {
614
+ const imageData = obj.image;
615
+ const dataUrlMatch = imageData.match(
616
+ /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
617
+ );
618
+ if (dataUrlMatch) {
619
+ return {
620
+ ...obj,
621
+ image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
622
+ };
623
+ }
624
+ if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
625
+ return {
626
+ ...obj,
627
+ image: `[IMAGE: unknown, ~${imageData.length} bytes]`
628
+ };
629
+ }
630
+ }
631
+ return null;
632
+ }
633
+
634
+ // src/agents/judge/judge-span-digest-formatter.ts
635
+ var JudgeSpanDigestFormatter = class {
636
+ logger = new Logger("JudgeSpanDigestFormatter");
637
+ deduplicator = new StringDeduplicator({ threshold: 50 });
638
+ /**
639
+ * Formats spans into a complete digest with full content and nesting.
640
+ * @param spans - All spans for a thread
641
+ * @returns Plain text digest
642
+ */
643
+ format(spans) {
644
+ this.deduplicator.reset();
645
+ this.logger.debug("format() called", {
646
+ spanCount: spans.length,
647
+ spanNames: spans.map((s) => s.name)
648
+ });
649
+ if (spans.length === 0) {
650
+ this.logger.debug("No spans to format");
651
+ return "No spans recorded.";
652
+ }
653
+ const sortedSpans = this.sortByStartTime(spans);
654
+ const tree = this.buildHierarchy(sortedSpans);
655
+ const totalDuration = this.calculateTotalDuration(sortedSpans);
656
+ this.logger.debug("Hierarchy built", {
657
+ rootCount: tree.length,
658
+ totalDuration
659
+ });
660
+ const lines = [
661
+ `Spans: ${spans.length} | Total Duration: ${this.formatDuration(
662
+ totalDuration
663
+ )}`,
664
+ ""
665
+ ];
666
+ let sequence = 1;
667
+ const rootCount = tree.length;
668
+ tree.forEach((node, idx) => {
669
+ sequence = this.renderNode(
670
+ node,
671
+ lines,
672
+ 0,
673
+ sequence,
674
+ idx === rootCount - 1
675
+ );
676
+ });
677
+ const errors = this.collectErrors(spans);
678
+ if (errors.length > 0) {
679
+ lines.push("");
680
+ lines.push("=== ERRORS ===");
681
+ errors.forEach((e) => lines.push(e));
682
+ }
683
+ return lines.join("\n");
684
+ }
685
+ sortByStartTime(spans) {
686
+ return [...spans].sort((a, b) => {
687
+ const aTime = this.hrTimeToMs(a.startTime);
688
+ const bTime = this.hrTimeToMs(b.startTime);
689
+ return aTime - bTime;
690
+ });
691
+ }
692
+ buildHierarchy(spans) {
693
+ var _a;
694
+ const spanMap = /* @__PURE__ */ new Map();
695
+ const roots = [];
696
+ for (const span of spans) {
697
+ spanMap.set(span.spanContext().spanId, { span, children: [] });
698
+ }
699
+ for (const span of spans) {
700
+ const node = spanMap.get(span.spanContext().spanId);
701
+ const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
702
+ if (parentId && spanMap.has(parentId)) {
703
+ spanMap.get(parentId).children.push(node);
704
+ } else {
705
+ roots.push(node);
706
+ }
707
+ }
708
+ return roots;
709
+ }
710
+ renderNode(node, lines, depth, sequence, isLast = true) {
711
+ const span = node.span;
712
+ const duration = this.calculateSpanDuration(span);
713
+ const timestamp = this.formatTimestamp(span.startTime);
714
+ const status = this.getStatusIndicator(span);
715
+ const prefix = this.getTreePrefix(depth, isLast);
716
+ lines.push(
717
+ `${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
718
+ );
719
+ const attrIndent = this.getAttrIndent(depth, isLast);
720
+ const attrs = this.cleanAttributes(span.attributes);
721
+ if (Object.keys(attrs).length > 0) {
722
+ for (const [key, value] of Object.entries(attrs)) {
723
+ lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
724
+ }
725
+ }
726
+ if (span.events.length > 0) {
727
+ for (const event of span.events) {
728
+ lines.push(`${attrIndent}[event] ${event.name}`);
729
+ if (event.attributes) {
730
+ const eventAttrs = this.cleanAttributes(event.attributes);
731
+ for (const [key, value] of Object.entries(eventAttrs)) {
732
+ lines.push(`${attrIndent} ${key}: ${this.formatValue(value)}`);
733
+ }
734
+ }
735
+ }
736
+ }
737
+ lines.push("");
738
+ let nextSeq = sequence + 1;
739
+ const childCount = node.children.length;
740
+ node.children.forEach((child, idx) => {
741
+ nextSeq = this.renderNode(
742
+ child,
743
+ lines,
744
+ depth + 1,
745
+ nextSeq,
746
+ idx === childCount - 1
747
+ );
748
+ });
749
+ return nextSeq;
750
+ }
751
+ getTreePrefix(depth, isLast) {
752
+ if (depth === 0) return "";
753
+ const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
754
+ return "\u2502 ".repeat(depth - 1) + connector;
755
+ }
756
+ getAttrIndent(depth, isLast) {
757
+ if (depth === 0) return " ";
758
+ const continuation = isLast ? " " : "\u2502 ";
759
+ return "\u2502 ".repeat(depth - 1) + continuation + " ";
760
+ }
761
+ cleanAttributes(attrs) {
762
+ const cleaned = {};
763
+ const seen = /* @__PURE__ */ new Set();
764
+ const excludedKeys = [
765
+ import_observability2.attributes.ATTR_LANGWATCH_THREAD_ID,
766
+ "langwatch.scenario.id",
767
+ "langwatch.scenario.name"
768
+ ];
769
+ for (const [key, value] of Object.entries(attrs)) {
770
+ if (excludedKeys.includes(key)) {
771
+ continue;
772
+ }
773
+ const cleanKey = key.replace(/^(langwatch)\./, "");
774
+ if (!seen.has(cleanKey)) {
775
+ seen.add(cleanKey);
776
+ cleaned[cleanKey] = value;
777
+ }
778
+ }
779
+ return cleaned;
780
+ }
781
+ formatValue(value) {
782
+ const processed = this.transformValue(value);
783
+ return typeof processed === "string" ? processed : JSON.stringify(processed);
784
+ }
785
+ transformValue(value) {
786
+ return deepTransform(value, (v) => {
787
+ const mediaPart = truncateMediaPart(v);
788
+ if (mediaPart) return mediaPart;
789
+ if (typeof v !== "string") return v;
790
+ return this.transformString(v);
791
+ });
792
+ }
793
+ transformString(str) {
794
+ if (this.looksLikeJson(str)) {
795
+ try {
796
+ const processed = this.transformValue(JSON.parse(str));
797
+ return JSON.stringify(processed);
798
+ } catch {
799
+ }
800
+ }
801
+ const truncated = truncateMediaUrl(str);
802
+ if (truncated !== str) return truncated;
803
+ return this.deduplicator.process(str);
804
+ }
805
+ looksLikeJson(str) {
806
+ const t = str.trim();
807
+ return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
808
+ }
809
+ hrTimeToMs(hrTime) {
810
+ return hrTime[0] * 1e3 + hrTime[1] / 1e6;
811
+ }
812
+ calculateSpanDuration(span) {
813
+ return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
814
+ }
815
+ calculateTotalDuration(spans) {
816
+ if (spans.length === 0) return 0;
817
+ const first = this.hrTimeToMs(spans[0].startTime);
818
+ const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
819
+ return last - first;
820
+ }
821
+ formatDuration(ms) {
822
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
823
+ return `${(ms / 1e3).toFixed(2)}s`;
824
+ }
825
+ formatTimestamp(hrTime) {
826
+ const ms = this.hrTimeToMs(hrTime);
827
+ return new Date(ms).toISOString();
828
+ }
829
+ getStatusIndicator(span) {
830
+ if (span.status.code === 2) {
831
+ return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
832
+ }
833
+ return "";
834
+ }
835
+ collectErrors(spans) {
836
+ return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
837
+ }
838
+ };
839
+ var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
840
+
841
+ // src/agents/judge/judge-agent.ts
842
+ function buildSystemPrompt(criteria, description) {
843
+ const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
844
+ return `
845
+ <role>
846
+ You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
847
+ </role>
848
+
849
+ <goal>
850
+ Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
851
+ If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
852
+ </goal>
853
+
854
+ <scenario>
855
+ ${description}
856
+ </scenario>
857
+
858
+ <criteria>
859
+ ${criteriaList}
860
+ </criteria>
861
+
862
+ <rules>
863
+ - Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
864
+ - DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
865
+ </rules>
866
+ `.trim();
867
+ }
868
+ function buildContinueTestTool() {
869
+ return (0, import_ai2.tool)({
870
+ description: "Continue the test with the next step",
871
+ inputSchema: import_v44.z.object({})
872
+ });
873
+ }
874
+ function buildFinishTestTool(criteria) {
875
+ const criteriaNames = criteria.map(criterionToParamName);
876
+ return (0, import_ai2.tool)({
877
+ description: "Complete the test with a final verdict",
878
+ inputSchema: import_v44.z.object({
879
+ criteria: import_v44.z.object(
880
+ Object.fromEntries(
881
+ criteriaNames.map((name, idx) => [
882
+ name,
883
+ import_v44.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
884
+ ])
885
+ )
886
+ ).strict().describe("Strict verdict for each criterion"),
887
+ reasoning: import_v44.z.string().describe("Explanation of what the final verdict should be"),
888
+ verdict: import_v44.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
889
+ })
890
+ });
891
+ }
892
+ var JudgeAgent = class extends JudgeAgentAdapter {
893
+ constructor(cfg) {
894
+ super();
895
+ this.cfg = cfg;
896
+ this.criteria = cfg.criteria;
897
+ this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
898
+ }
899
+ logger = new Logger("JudgeAgent");
900
+ spanCollector;
901
+ role = "Judge" /* JUDGE */;
902
+ criteria;
903
+ /**
904
+ * LLM invocation function. Can be overridden to customize LLM behavior.
905
+ */
906
+ invokeLLM = createLLMInvoker(this.logger);
907
+ async call(input) {
908
+ var _a, _b, _c;
909
+ this.logger.debug("call() invoked", {
910
+ threadId: input.threadId,
911
+ currentTurn: input.scenarioState.currentTurn,
912
+ maxTurns: input.scenarioConfig.maxTurns,
913
+ judgmentRequest: input.judgmentRequest
914
+ });
915
+ const digest = this.getOpenTelemetryTracesDigest(input.threadId);
916
+ this.logger.debug("OpenTelemetry traces built", { digest });
917
+ const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
918
+ const contentForJudge = `
919
+ <transcript>
920
+ ${transcript}
921
+ </transcript>
922
+ <opentelemetry_traces>
923
+ ${digest}
924
+ </opentelemetry_traces>
925
+ `;
926
+ const cfg = this.cfg;
927
+ const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
928
+ const messages = [
929
+ { role: "system", content: systemPrompt },
930
+ { role: "user", content: contentForJudge }
931
+ ];
932
+ const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
933
+ const projectConfig = await getProjectConfig();
934
+ const mergedConfig = modelSchema.parse({
935
+ ...projectConfig == null ? void 0 : projectConfig.defaultModel,
936
+ ...cfg
937
+ });
938
+ const tools = {
939
+ continue_test: buildContinueTestTool(),
940
+ finish_test: buildFinishTestTool(cfg.criteria)
941
+ };
942
+ const enforceJudgement = input.judgmentRequest;
943
+ const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
944
+ if (enforceJudgement && !hasCriteria) {
945
+ return {
946
+ success: false,
947
+ reasoning: "JudgeAgent: No criteria was provided to be judged against",
948
+ metCriteria: [],
949
+ unmetCriteria: []
950
+ };
951
+ }
952
+ const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
953
+ this.logger.debug("Calling LLM", {
954
+ model: mergedConfig.model,
955
+ toolChoice,
956
+ isLastMessage,
957
+ enforceJudgement
958
+ });
959
+ const completion = await this.invokeLLM({
960
+ model: mergedConfig.model,
961
+ messages,
962
+ temperature: mergedConfig.temperature ?? 0,
963
+ maxOutputTokens: mergedConfig.maxTokens,
492
964
  tools,
493
965
  toolChoice
494
966
  });
967
+ this.logger.debug("LLM response received", {
968
+ toolCallCount: ((_a = completion.toolCalls) == null ? void 0 : _a.length) ?? 0,
969
+ toolCalls: (_b = completion.toolCalls) == null ? void 0 : _b.map((tc) => ({
970
+ toolName: tc.toolName,
971
+ args: tc.input
972
+ }))
973
+ });
495
974
  let args;
496
- if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
975
+ if ((_c = completion.toolCalls) == null ? void 0 : _c.length) {
497
976
  const toolCall = completion.toolCalls[0];
498
977
  switch (toolCall.toolName) {
499
978
  case "finish_test": {
500
- args = toolCall.args;
979
+ args = toolCall.input;
501
980
  const verdict = args.verdict || "inconclusive";
502
981
  const reasoning = args.reasoning || "No reasoning provided";
503
982
  const criteria = args.criteria || {};
@@ -508,20 +987,21 @@ var JudgeAgent = class extends JudgeAgentAdapter {
508
987
  const unmetCriteria = cfg.criteria.filter(
509
988
  (_, i) => criteriaValues[i] !== "true"
510
989
  );
511
- return {
990
+ const result = {
512
991
  success: verdict === "success",
513
- messages: input.messages,
514
992
  reasoning,
515
993
  metCriteria,
516
994
  unmetCriteria
517
995
  };
996
+ this.logger.debug("finish_test result", result);
997
+ return result;
518
998
  }
519
999
  case "continue_test":
520
- return [];
1000
+ this.logger.debug("continue_test - proceeding to next turn");
1001
+ return null;
521
1002
  default:
522
1003
  return {
523
1004
  success: false,
524
- messages: input.messages,
525
1005
  reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
526
1006
  metCriteria: [],
527
1007
  unmetCriteria: cfg.criteria
@@ -530,101 +1010,1184 @@ var JudgeAgent = class extends JudgeAgentAdapter {
530
1010
  }
531
1011
  return {
532
1012
  success: false,
533
- messages: input.messages,
534
1013
  reasoning: `JudgeAgent: No tool call found in LLM output`,
535
1014
  metCriteria: [],
536
1015
  unmetCriteria: cfg.criteria
537
1016
  };
538
1017
  }
539
- async generateText(input) {
540
- try {
541
- return await (0, import_ai.generateText)(input);
542
- } catch (error) {
543
- this.logger.error("Error generating text", { error });
544
- throw error;
545
- }
1018
+ getOpenTelemetryTracesDigest(threadId) {
1019
+ const spans = this.spanCollector.getSpansForThread(threadId);
1020
+ const digest = judgeSpanDigestFormatter.format(spans);
1021
+ return digest;
546
1022
  }
547
1023
  };
548
1024
  var judgeAgent = (cfg) => {
549
1025
  return new JudgeAgent(cfg);
550
1026
  };
551
1027
 
552
- // src/agents/user-simulator-agent.ts
553
- var import_ai2 = require("ai");
554
- function buildSystemPrompt2(description) {
555
- return `
556
- <role>
557
- You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
558
- Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
559
- </role>
1028
+ // src/agents/user-simulator-agent.ts
1029
+ function buildSystemPrompt2(description) {
1030
+ return `
1031
+ <role>
1032
+ You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
1033
+ Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
1034
+ </role>
1035
+
1036
+ <goal>
1037
+ Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
1038
+ </goal>
1039
+
1040
+ <scenario>
1041
+ ${description}
1042
+ </scenario>
1043
+
1044
+ <rules>
1045
+ - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
1046
+ </rules>
1047
+ `.trim();
1048
+ }
1049
+ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
1050
+ constructor(cfg) {
1051
+ super();
1052
+ this.cfg = cfg;
1053
+ }
1054
+ logger = new Logger(this.constructor.name);
1055
+ /**
1056
+ * LLM invocation function. Can be overridden to customize LLM behavior.
1057
+ */
1058
+ invokeLLM = createLLMInvoker(this.logger);
1059
+ call = async (input) => {
1060
+ const config2 = this.cfg;
1061
+ const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
1062
+ const messages = [
1063
+ { role: "system", content: systemPrompt },
1064
+ { role: "assistant", content: "Hello, how can I help you today" },
1065
+ ...input.messages
1066
+ ];
1067
+ const projectConfig = await getProjectConfig();
1068
+ const mergedConfig = modelSchema.parse({
1069
+ ...projectConfig == null ? void 0 : projectConfig.defaultModel,
1070
+ ...config2
1071
+ });
1072
+ const reversedMessages = messageRoleReversal(messages);
1073
+ const completion = await this.invokeLLM({
1074
+ model: mergedConfig.model,
1075
+ messages: reversedMessages,
1076
+ temperature: mergedConfig.temperature,
1077
+ maxOutputTokens: mergedConfig.maxTokens
1078
+ });
1079
+ const messageContent = completion.text;
1080
+ if (!messageContent) {
1081
+ throw new Error("No response content from LLM");
1082
+ }
1083
+ return { role: "user", content: messageContent };
1084
+ };
1085
+ };
1086
+ var userSimulatorAgent = (config2) => {
1087
+ return new UserSimulatorAgent(config2);
1088
+ };
1089
+
1090
+ // src/agents/realtime/realtime-agent.adapter.ts
1091
+ var import_events = require("events");
1092
+
1093
+ // src/agents/realtime/message-processor.ts
1094
+ var MessageProcessor = class {
1095
+ /**
1096
+ * Processes audio message content and extracts base64 audio data
1097
+ *
1098
+ * @param content - The message content to process
1099
+ * @returns Base64 audio data string or null if no audio found
1100
+ * @throws {Error} If audio data is invalid
1101
+ */
1102
+ processAudioMessage(content) {
1103
+ if (!Array.isArray(content)) {
1104
+ return null;
1105
+ }
1106
+ for (const part of content) {
1107
+ if (typeof part === "object" && part !== null && "type" in part && part.type === "file" && "mediaType" in part && typeof part.mediaType === "string" && part.mediaType.startsWith("audio/")) {
1108
+ if (!("data" in part) || typeof part.data !== "string") {
1109
+ throw new Error(
1110
+ `Audio data must be base64 string, got: ${typeof part.data}`
1111
+ );
1112
+ }
1113
+ if (!part.data || part.data.length === 0) {
1114
+ throw new Error(
1115
+ `Audio message has no data. Part: ${JSON.stringify(part)}`
1116
+ );
1117
+ }
1118
+ return part.data;
1119
+ }
1120
+ }
1121
+ return null;
1122
+ }
1123
+ /**
1124
+ * Extracts text content from message content
1125
+ *
1126
+ * @param content - The message content to process
1127
+ * @returns Text string or empty string if no text found
1128
+ */
1129
+ extractTextMessage(content) {
1130
+ return typeof content === "string" ? content : "";
1131
+ }
1132
+ /**
1133
+ * Validates that a message has either text or audio content
1134
+ *
1135
+ * @param content - The message content to validate
1136
+ * @returns True if the message has valid content
1137
+ */
1138
+ hasValidContent(content) {
1139
+ const hasText = this.extractTextMessage(content).length > 0;
1140
+ const hasAudio = this.processAudioMessage(content) !== null;
1141
+ return hasText || hasAudio;
1142
+ }
1143
+ };
1144
+
1145
+ // src/agents/realtime/realtime-event-handler.ts
1146
+ var RealtimeEventHandler = class {
1147
+ /**
1148
+ * Creates a new RealtimeEventHandler instance
1149
+ * @param session - The RealtimeSession to listen to events from
1150
+ */
1151
+ constructor(session) {
1152
+ this.session = session;
1153
+ this.ensureEventListeners();
1154
+ }
1155
+ currentResponse = "";
1156
+ currentAudioChunks = [];
1157
+ responseResolver = null;
1158
+ errorRejecter = null;
1159
+ listenersSetup = false;
1160
+ /**
1161
+ * Gets the transport from the session
1162
+ */
1163
+ getTransport() {
1164
+ const sessionWithTransport = this.session;
1165
+ return sessionWithTransport.transport ?? null;
1166
+ }
1167
+ /**
1168
+ * Ensures event listeners are set up, retrying if transport not available
1169
+ */
1170
+ ensureEventListeners() {
1171
+ if (this.listenersSetup) return;
1172
+ const transport = this.getTransport();
1173
+ if (!transport) {
1174
+ setTimeout(() => this.ensureEventListeners(), 100);
1175
+ return;
1176
+ }
1177
+ this.setupEventListeners();
1178
+ }
1179
+ /**
1180
+ * Sets up event listeners for the RealtimeSession transport layer
1181
+ */
1182
+ setupEventListeners() {
1183
+ if (this.listenersSetup) return;
1184
+ const transport = this.getTransport();
1185
+ if (!transport) {
1186
+ console.error("\u274C Transport not available on session");
1187
+ return;
1188
+ }
1189
+ transport.on("response.output_audio_transcript.delta", (event) => {
1190
+ const deltaEvent = event;
1191
+ if (typeof deltaEvent.delta === "string") {
1192
+ this.currentResponse += deltaEvent.delta;
1193
+ }
1194
+ });
1195
+ transport.on("response.output_audio.delta", (event) => {
1196
+ const deltaEvent = event;
1197
+ if (typeof deltaEvent.delta === "string") {
1198
+ this.currentAudioChunks.push(deltaEvent.delta);
1199
+ }
1200
+ });
1201
+ transport.on("response.done", () => {
1202
+ const fullAudio = this.currentAudioChunks.join("");
1203
+ const audioResponse = {
1204
+ transcript: this.currentResponse,
1205
+ audio: fullAudio
1206
+ };
1207
+ if (this.responseResolver) {
1208
+ this.responseResolver(audioResponse);
1209
+ this.reset();
1210
+ }
1211
+ });
1212
+ transport.on("error", (error) => {
1213
+ console.error(`\u274C Transport error:`, error);
1214
+ if (this.errorRejecter) {
1215
+ const errorObj = error instanceof Error ? error : new Error(String(error));
1216
+ this.errorRejecter(errorObj);
1217
+ this.reset();
1218
+ }
1219
+ });
1220
+ this.listenersSetup = true;
1221
+ }
1222
+ /**
1223
+ * Waits for the agent response with timeout
1224
+ *
1225
+ * @param timeout - Maximum time to wait in milliseconds
1226
+ * @returns Promise that resolves with the audio response event
1227
+ * @throws {Error} If timeout occurs or transport error happens
1228
+ */
1229
+ waitForResponse(timeout) {
1230
+ return new Promise((resolve, reject) => {
1231
+ this.responseResolver = resolve;
1232
+ this.errorRejecter = reject;
1233
+ const timeoutId = setTimeout(() => {
1234
+ if (this.responseResolver) {
1235
+ this.reset();
1236
+ reject(new Error(`Agent response timeout after ${timeout}ms`));
1237
+ }
1238
+ }, timeout);
1239
+ const originalResolver = resolve;
1240
+ this.responseResolver = (value) => {
1241
+ clearTimeout(timeoutId);
1242
+ originalResolver(value);
1243
+ };
1244
+ });
1245
+ }
1246
+ /**
1247
+ * Resets the internal state for the next response
1248
+ */
1249
+ reset() {
1250
+ this.responseResolver = null;
1251
+ this.errorRejecter = null;
1252
+ this.currentResponse = "";
1253
+ this.currentAudioChunks = [];
1254
+ }
1255
+ };
1256
+
1257
+ // src/agents/realtime/response-formatter.ts
1258
+ var ResponseFormatter = class {
1259
+ /**
1260
+ * Formats an audio response event into Scenario framework format
1261
+ *
1262
+ * @param audioEvent - The audio response event from the Realtime API
1263
+ * @returns Formatted assistant message with audio and text content
1264
+ */
1265
+ formatAudioResponse(audioEvent) {
1266
+ return {
1267
+ role: "assistant",
1268
+ content: [
1269
+ { type: "text", text: audioEvent.transcript },
1270
+ { type: "file", mediaType: "audio/pcm16", data: audioEvent.audio }
1271
+ ]
1272
+ };
1273
+ }
1274
+ /**
1275
+ * Formats a text response for the Scenario framework
1276
+ *
1277
+ * @param text - The text response from the agent
1278
+ * @returns Plain text response string
1279
+ */
1280
+ formatTextResponse(text) {
1281
+ return text;
1282
+ }
1283
+ /**
1284
+ * Creates an initial response message for when no user message exists
1285
+ *
1286
+ * @param audioEvent - The audio response event from the Realtime API
1287
+ * @returns Formatted assistant message for initial responses
1288
+ */
1289
+ formatInitialResponse(audioEvent) {
1290
+ return this.formatAudioResponse(audioEvent);
1291
+ }
1292
+ };
1293
+
1294
+ // src/agents/realtime/realtime-agent.adapter.ts
1295
+ var RealtimeAgentAdapter = class extends AgentAdapter {
1296
+ /**
1297
+ * Creates a new RealtimeAgentAdapter instance
1298
+ *
1299
+ * The session can be either connected or unconnected.
1300
+ * If unconnected, call connect() with an API key before use.
1301
+ *
1302
+ * @param config - Configuration for the realtime agent adapter
1303
+ */
1304
+ constructor(config2) {
1305
+ super();
1306
+ this.config = config2;
1307
+ this.role = this.config.role;
1308
+ this.name = this.config.agentName;
1309
+ this.session = config2.session;
1310
+ this.eventHandler = new RealtimeEventHandler(this.session);
1311
+ }
1312
+ role;
1313
+ name;
1314
+ session;
1315
+ eventHandler;
1316
+ messageProcessor = new MessageProcessor();
1317
+ responseFormatter = new ResponseFormatter();
1318
+ audioEvents = new import_events.EventEmitter();
1319
+ /**
1320
+ * Get the connect method from the session
1321
+ */
1322
+ async connect(params) {
1323
+ const { apiKey, ...rest } = params ?? {};
1324
+ await this.session.connect({
1325
+ apiKey: apiKey ?? process.env.OPENAI_API_KEY,
1326
+ ...rest
1327
+ });
1328
+ }
1329
+ /**
1330
+ * Closes the session connection
1331
+ */
1332
+ async disconnect() {
1333
+ this.session.close();
1334
+ }
1335
+ /**
1336
+ * Process input and generate response (implements AgentAdapter interface)
1337
+ *
1338
+ * This is called by Scenario framework for each agent turn.
1339
+ * Handles both text and audio input, returns audio message with transcript.
1340
+ *
1341
+ * @param input - Scenario agent input with message history
1342
+ * @returns Agent response as audio message or text
1343
+ */
1344
+ async call(input) {
1345
+ console.log(`\u{1F50A} [${this.name}] being called with role: ${this.role}`);
1346
+ const latestMessage = input.newMessages[input.newMessages.length - 1];
1347
+ if (!latestMessage) {
1348
+ return this.handleInitialResponse();
1349
+ }
1350
+ const audioData = this.messageProcessor.processAudioMessage(
1351
+ latestMessage.content
1352
+ );
1353
+ if (audioData) {
1354
+ return this.handleAudioInput(audioData);
1355
+ }
1356
+ const text = this.messageProcessor.extractTextMessage(
1357
+ latestMessage.content
1358
+ );
1359
+ if (!text) {
1360
+ throw new Error("Message has no text or audio content");
1361
+ }
1362
+ return this.handleTextInput(text);
1363
+ }
1364
+ /**
1365
+ * Handles the initial response when no user message exists
1366
+ */
1367
+ async handleInitialResponse() {
1368
+ console.log(`[${this.name}] First message, creating response`);
1369
+ const sessionWithTransport = this.session;
1370
+ const transport = sessionWithTransport.transport;
1371
+ if (!transport) {
1372
+ throw new Error("Realtime transport not available");
1373
+ }
1374
+ transport.sendEvent({
1375
+ type: "response.create"
1376
+ });
1377
+ const timeout = this.config.responseTimeout ?? 6e4;
1378
+ const response = await this.eventHandler.waitForResponse(timeout);
1379
+ this.audioEvents.emit("audioResponse", response);
1380
+ return this.responseFormatter.formatInitialResponse(response);
1381
+ }
1382
+ /**
1383
+ * Handles audio input from the user
1384
+ */
1385
+ async handleAudioInput(audioData) {
1386
+ const sessionWithTransport = this.session;
1387
+ const transport = sessionWithTransport.transport;
1388
+ if (!transport) {
1389
+ throw new Error("Realtime transport not available");
1390
+ }
1391
+ transport.sendEvent({
1392
+ type: "input_audio_buffer.append",
1393
+ audio: audioData
1394
+ });
1395
+ transport.sendEvent({
1396
+ type: "input_audio_buffer.commit"
1397
+ });
1398
+ transport.sendEvent({
1399
+ type: "response.create"
1400
+ });
1401
+ const timeout = this.config.responseTimeout ?? 6e4;
1402
+ const response = await this.eventHandler.waitForResponse(timeout);
1403
+ this.audioEvents.emit("audioResponse", response);
1404
+ return this.responseFormatter.formatAudioResponse(response);
1405
+ }
1406
+ /**
1407
+ * Handles text input from the user
1408
+ */
1409
+ async handleTextInput(text) {
1410
+ this.session.sendMessage(text);
1411
+ const timeout = this.config.responseTimeout ?? 3e4;
1412
+ const response = await this.eventHandler.waitForResponse(timeout);
1413
+ this.audioEvents.emit("audioResponse", response);
1414
+ return this.responseFormatter.formatTextResponse(response.transcript);
1415
+ }
1416
+ /**
1417
+ * Subscribe to audio response events
1418
+ *
1419
+ * @param callback - Function called when an audio response completes
1420
+ */
1421
+ onAudioResponse(callback) {
1422
+ this.audioEvents.on("audioResponse", callback);
1423
+ }
1424
+ /**
1425
+ * Remove audio response listener
1426
+ *
1427
+ * @param callback - The callback function to remove
1428
+ */
1429
+ offAudioResponse(callback) {
1430
+ this.audioEvents.off("audioResponse", callback);
1431
+ }
1432
+ };
1433
+
1434
+ // src/execution/index.ts
1435
+ var execution_exports = {};
1436
+ __export(execution_exports, {
1437
+ ScenarioExecution: () => ScenarioExecution,
1438
+ ScenarioExecutionState: () => ScenarioExecutionState,
1439
+ StateChangeEventType: () => StateChangeEventType
1440
+ });
1441
+
1442
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
1443
+ var _globalThis = typeof globalThis === "object" ? globalThis : global;
1444
+
1445
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
1446
+ var VERSION = "1.9.0";
1447
+
1448
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
1449
+ var re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
1450
+ function _makeCompatibilityCheck(ownVersion) {
1451
+ var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
1452
+ var rejectedVersions = /* @__PURE__ */ new Set();
1453
+ var myVersionMatch = ownVersion.match(re);
1454
+ if (!myVersionMatch) {
1455
+ return function() {
1456
+ return false;
1457
+ };
1458
+ }
1459
+ var ownVersionParsed = {
1460
+ major: +myVersionMatch[1],
1461
+ minor: +myVersionMatch[2],
1462
+ patch: +myVersionMatch[3],
1463
+ prerelease: myVersionMatch[4]
1464
+ };
1465
+ if (ownVersionParsed.prerelease != null) {
1466
+ return function isExactmatch(globalVersion) {
1467
+ return globalVersion === ownVersion;
1468
+ };
1469
+ }
1470
+ function _reject(v) {
1471
+ rejectedVersions.add(v);
1472
+ return false;
1473
+ }
1474
+ function _accept(v) {
1475
+ acceptedVersions.add(v);
1476
+ return true;
1477
+ }
1478
+ return function isCompatible2(globalVersion) {
1479
+ if (acceptedVersions.has(globalVersion)) {
1480
+ return true;
1481
+ }
1482
+ if (rejectedVersions.has(globalVersion)) {
1483
+ return false;
1484
+ }
1485
+ var globalVersionMatch = globalVersion.match(re);
1486
+ if (!globalVersionMatch) {
1487
+ return _reject(globalVersion);
1488
+ }
1489
+ var globalVersionParsed = {
1490
+ major: +globalVersionMatch[1],
1491
+ minor: +globalVersionMatch[2],
1492
+ patch: +globalVersionMatch[3],
1493
+ prerelease: globalVersionMatch[4]
1494
+ };
1495
+ if (globalVersionParsed.prerelease != null) {
1496
+ return _reject(globalVersion);
1497
+ }
1498
+ if (ownVersionParsed.major !== globalVersionParsed.major) {
1499
+ return _reject(globalVersion);
1500
+ }
1501
+ if (ownVersionParsed.major === 0) {
1502
+ if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
1503
+ return _accept(globalVersion);
1504
+ }
1505
+ return _reject(globalVersion);
1506
+ }
1507
+ if (ownVersionParsed.minor <= globalVersionParsed.minor) {
1508
+ return _accept(globalVersion);
1509
+ }
1510
+ return _reject(globalVersion);
1511
+ };
1512
+ }
1513
+ var isCompatible = _makeCompatibilityCheck(VERSION);
1514
+
1515
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
1516
+ var major = VERSION.split(".")[0];
1517
+ var GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
1518
+ var _global = _globalThis;
1519
+ function registerGlobal(type, instance, diag, allowOverride) {
1520
+ var _a;
1521
+ if (allowOverride === void 0) {
1522
+ allowOverride = false;
1523
+ }
1524
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
1525
+ version: VERSION
1526
+ };
1527
+ if (!allowOverride && api[type]) {
1528
+ var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
1529
+ diag.error(err.stack || err.message);
1530
+ return false;
1531
+ }
1532
+ if (api.version !== VERSION) {
1533
+ var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
1534
+ diag.error(err.stack || err.message);
1535
+ return false;
1536
+ }
1537
+ api[type] = instance;
1538
+ diag.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
1539
+ return true;
1540
+ }
1541
+ function getGlobal(type) {
1542
+ var _a, _b;
1543
+ var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
1544
+ if (!globalVersion || !isCompatible(globalVersion)) {
1545
+ return;
1546
+ }
1547
+ return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
1548
+ }
1549
+ function unregisterGlobal(type, diag) {
1550
+ diag.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
1551
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
1552
+ if (api) {
1553
+ delete api[type];
1554
+ }
1555
+ }
1556
+
1557
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
1558
+ var __read = function(o, n) {
1559
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1560
+ if (!m) return o;
1561
+ var i = m.call(o), r, ar = [], e;
1562
+ try {
1563
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1564
+ } catch (error) {
1565
+ e = { error };
1566
+ } finally {
1567
+ try {
1568
+ if (r && !r.done && (m = i["return"])) m.call(i);
1569
+ } finally {
1570
+ if (e) throw e.error;
1571
+ }
1572
+ }
1573
+ return ar;
1574
+ };
1575
+ var __spreadArray = function(to, from, pack) {
1576
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1577
+ if (ar || !(i in from)) {
1578
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1579
+ ar[i] = from[i];
1580
+ }
1581
+ }
1582
+ return to.concat(ar || Array.prototype.slice.call(from));
1583
+ };
1584
+ var DiagComponentLogger = (
1585
+ /** @class */
1586
+ (function() {
1587
+ function DiagComponentLogger2(props) {
1588
+ this._namespace = props.namespace || "DiagComponentLogger";
1589
+ }
1590
+ DiagComponentLogger2.prototype.debug = function() {
1591
+ var args = [];
1592
+ for (var _i = 0; _i < arguments.length; _i++) {
1593
+ args[_i] = arguments[_i];
1594
+ }
1595
+ return logProxy("debug", this._namespace, args);
1596
+ };
1597
+ DiagComponentLogger2.prototype.error = function() {
1598
+ var args = [];
1599
+ for (var _i = 0; _i < arguments.length; _i++) {
1600
+ args[_i] = arguments[_i];
1601
+ }
1602
+ return logProxy("error", this._namespace, args);
1603
+ };
1604
+ DiagComponentLogger2.prototype.info = function() {
1605
+ var args = [];
1606
+ for (var _i = 0; _i < arguments.length; _i++) {
1607
+ args[_i] = arguments[_i];
1608
+ }
1609
+ return logProxy("info", this._namespace, args);
1610
+ };
1611
+ DiagComponentLogger2.prototype.warn = function() {
1612
+ var args = [];
1613
+ for (var _i = 0; _i < arguments.length; _i++) {
1614
+ args[_i] = arguments[_i];
1615
+ }
1616
+ return logProxy("warn", this._namespace, args);
1617
+ };
1618
+ DiagComponentLogger2.prototype.verbose = function() {
1619
+ var args = [];
1620
+ for (var _i = 0; _i < arguments.length; _i++) {
1621
+ args[_i] = arguments[_i];
1622
+ }
1623
+ return logProxy("verbose", this._namespace, args);
1624
+ };
1625
+ return DiagComponentLogger2;
1626
+ })()
1627
+ );
1628
+ function logProxy(funcName, namespace, args) {
1629
+ var logger2 = getGlobal("diag");
1630
+ if (!logger2) {
1631
+ return;
1632
+ }
1633
+ args.unshift(namespace);
1634
+ return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
1635
+ }
1636
+
1637
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
1638
+ var DiagLogLevel;
1639
+ (function(DiagLogLevel2) {
1640
+ DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
1641
+ DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
1642
+ DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
1643
+ DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
1644
+ DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
1645
+ DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
1646
+ DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
1647
+ })(DiagLogLevel || (DiagLogLevel = {}));
1648
+
1649
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
1650
+ function createLogLevelDiagLogger(maxLevel, logger2) {
1651
+ if (maxLevel < DiagLogLevel.NONE) {
1652
+ maxLevel = DiagLogLevel.NONE;
1653
+ } else if (maxLevel > DiagLogLevel.ALL) {
1654
+ maxLevel = DiagLogLevel.ALL;
1655
+ }
1656
+ logger2 = logger2 || {};
1657
+ function _filterFunc(funcName, theLevel) {
1658
+ var theFunc = logger2[funcName];
1659
+ if (typeof theFunc === "function" && maxLevel >= theLevel) {
1660
+ return theFunc.bind(logger2);
1661
+ }
1662
+ return function() {
1663
+ };
1664
+ }
1665
+ return {
1666
+ error: _filterFunc("error", DiagLogLevel.ERROR),
1667
+ warn: _filterFunc("warn", DiagLogLevel.WARN),
1668
+ info: _filterFunc("info", DiagLogLevel.INFO),
1669
+ debug: _filterFunc("debug", DiagLogLevel.DEBUG),
1670
+ verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
1671
+ };
1672
+ }
1673
+
1674
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
1675
+ var __read2 = function(o, n) {
1676
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1677
+ if (!m) return o;
1678
+ var i = m.call(o), r, ar = [], e;
1679
+ try {
1680
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1681
+ } catch (error) {
1682
+ e = { error };
1683
+ } finally {
1684
+ try {
1685
+ if (r && !r.done && (m = i["return"])) m.call(i);
1686
+ } finally {
1687
+ if (e) throw e.error;
1688
+ }
1689
+ }
1690
+ return ar;
1691
+ };
1692
+ var __spreadArray2 = function(to, from, pack) {
1693
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1694
+ if (ar || !(i in from)) {
1695
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1696
+ ar[i] = from[i];
1697
+ }
1698
+ }
1699
+ return to.concat(ar || Array.prototype.slice.call(from));
1700
+ };
1701
+ var API_NAME = "diag";
1702
+ var DiagAPI = (
1703
+ /** @class */
1704
+ (function() {
1705
+ function DiagAPI2() {
1706
+ function _logProxy(funcName) {
1707
+ return function() {
1708
+ var args = [];
1709
+ for (var _i = 0; _i < arguments.length; _i++) {
1710
+ args[_i] = arguments[_i];
1711
+ }
1712
+ var logger2 = getGlobal("diag");
1713
+ if (!logger2)
1714
+ return;
1715
+ return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
1716
+ };
1717
+ }
1718
+ var self = this;
1719
+ var setLogger = function(logger2, optionsOrLogLevel) {
1720
+ var _a, _b, _c;
1721
+ if (optionsOrLogLevel === void 0) {
1722
+ optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
1723
+ }
1724
+ if (logger2 === self) {
1725
+ var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
1726
+ self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
1727
+ return false;
1728
+ }
1729
+ if (typeof optionsOrLogLevel === "number") {
1730
+ optionsOrLogLevel = {
1731
+ logLevel: optionsOrLogLevel
1732
+ };
1733
+ }
1734
+ var oldLogger = getGlobal("diag");
1735
+ var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
1736
+ if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
1737
+ var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
1738
+ oldLogger.warn("Current logger will be overwritten from " + stack);
1739
+ newLogger.warn("Current logger will overwrite one already registered from " + stack);
1740
+ }
1741
+ return registerGlobal("diag", newLogger, self, true);
1742
+ };
1743
+ self.setLogger = setLogger;
1744
+ self.disable = function() {
1745
+ unregisterGlobal(API_NAME, self);
1746
+ };
1747
+ self.createComponentLogger = function(options) {
1748
+ return new DiagComponentLogger(options);
1749
+ };
1750
+ self.verbose = _logProxy("verbose");
1751
+ self.debug = _logProxy("debug");
1752
+ self.info = _logProxy("info");
1753
+ self.warn = _logProxy("warn");
1754
+ self.error = _logProxy("error");
1755
+ }
1756
+ DiagAPI2.instance = function() {
1757
+ if (!this._instance) {
1758
+ this._instance = new DiagAPI2();
1759
+ }
1760
+ return this._instance;
1761
+ };
1762
+ return DiagAPI2;
1763
+ })()
1764
+ );
1765
+
1766
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
1767
+ function createContextKey(description) {
1768
+ return Symbol.for(description);
1769
+ }
1770
+ var BaseContext = (
1771
+ /** @class */
1772
+ /* @__PURE__ */ (function() {
1773
+ function BaseContext2(parentContext) {
1774
+ var self = this;
1775
+ self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
1776
+ self.getValue = function(key) {
1777
+ return self._currentContext.get(key);
1778
+ };
1779
+ self.setValue = function(key, value) {
1780
+ var context2 = new BaseContext2(self._currentContext);
1781
+ context2._currentContext.set(key, value);
1782
+ return context2;
1783
+ };
1784
+ self.deleteValue = function(key) {
1785
+ var context2 = new BaseContext2(self._currentContext);
1786
+ context2._currentContext.delete(key);
1787
+ return context2;
1788
+ };
1789
+ }
1790
+ return BaseContext2;
1791
+ })()
1792
+ );
1793
+ var ROOT_CONTEXT = new BaseContext();
1794
+
1795
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
1796
+ var __read3 = function(o, n) {
1797
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1798
+ if (!m) return o;
1799
+ var i = m.call(o), r, ar = [], e;
1800
+ try {
1801
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1802
+ } catch (error) {
1803
+ e = { error };
1804
+ } finally {
1805
+ try {
1806
+ if (r && !r.done && (m = i["return"])) m.call(i);
1807
+ } finally {
1808
+ if (e) throw e.error;
1809
+ }
1810
+ }
1811
+ return ar;
1812
+ };
1813
+ var __spreadArray3 = function(to, from, pack) {
1814
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1815
+ if (ar || !(i in from)) {
1816
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1817
+ ar[i] = from[i];
1818
+ }
1819
+ }
1820
+ return to.concat(ar || Array.prototype.slice.call(from));
1821
+ };
1822
+ var NoopContextManager = (
1823
+ /** @class */
1824
+ (function() {
1825
+ function NoopContextManager2() {
1826
+ }
1827
+ NoopContextManager2.prototype.active = function() {
1828
+ return ROOT_CONTEXT;
1829
+ };
1830
+ NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
1831
+ var args = [];
1832
+ for (var _i = 3; _i < arguments.length; _i++) {
1833
+ args[_i - 3] = arguments[_i];
1834
+ }
1835
+ return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
1836
+ };
1837
+ NoopContextManager2.prototype.bind = function(_context, target) {
1838
+ return target;
1839
+ };
1840
+ NoopContextManager2.prototype.enable = function() {
1841
+ return this;
1842
+ };
1843
+ NoopContextManager2.prototype.disable = function() {
1844
+ return this;
1845
+ };
1846
+ return NoopContextManager2;
1847
+ })()
1848
+ );
1849
+
1850
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
1851
+ var __read4 = function(o, n) {
1852
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
1853
+ if (!m) return o;
1854
+ var i = m.call(o), r, ar = [], e;
1855
+ try {
1856
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1857
+ } catch (error) {
1858
+ e = { error };
1859
+ } finally {
1860
+ try {
1861
+ if (r && !r.done && (m = i["return"])) m.call(i);
1862
+ } finally {
1863
+ if (e) throw e.error;
1864
+ }
1865
+ }
1866
+ return ar;
1867
+ };
1868
+ var __spreadArray4 = function(to, from, pack) {
1869
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1870
+ if (ar || !(i in from)) {
1871
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1872
+ ar[i] = from[i];
1873
+ }
1874
+ }
1875
+ return to.concat(ar || Array.prototype.slice.call(from));
1876
+ };
1877
+ var API_NAME2 = "context";
1878
+ var NOOP_CONTEXT_MANAGER = new NoopContextManager();
1879
+ var ContextAPI = (
1880
+ /** @class */
1881
+ (function() {
1882
+ function ContextAPI2() {
1883
+ }
1884
+ ContextAPI2.getInstance = function() {
1885
+ if (!this._instance) {
1886
+ this._instance = new ContextAPI2();
1887
+ }
1888
+ return this._instance;
1889
+ };
1890
+ ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
1891
+ return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
1892
+ };
1893
+ ContextAPI2.prototype.active = function() {
1894
+ return this._getContextManager().active();
1895
+ };
1896
+ ContextAPI2.prototype.with = function(context2, fn, thisArg) {
1897
+ var _a;
1898
+ var args = [];
1899
+ for (var _i = 3; _i < arguments.length; _i++) {
1900
+ args[_i - 3] = arguments[_i];
1901
+ }
1902
+ return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
1903
+ };
1904
+ ContextAPI2.prototype.bind = function(context2, target) {
1905
+ return this._getContextManager().bind(context2, target);
1906
+ };
1907
+ ContextAPI2.prototype._getContextManager = function() {
1908
+ return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
1909
+ };
1910
+ ContextAPI2.prototype.disable = function() {
1911
+ this._getContextManager().disable();
1912
+ unregisterGlobal(API_NAME2, DiagAPI.instance());
1913
+ };
1914
+ return ContextAPI2;
1915
+ })()
1916
+ );
1917
+
1918
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
1919
+ var TraceFlags;
1920
+ (function(TraceFlags2) {
1921
+ TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
1922
+ TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
1923
+ })(TraceFlags || (TraceFlags = {}));
1924
+
1925
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
1926
+ var INVALID_SPANID = "0000000000000000";
1927
+ var INVALID_TRACEID = "00000000000000000000000000000000";
1928
+ var INVALID_SPAN_CONTEXT = {
1929
+ traceId: INVALID_TRACEID,
1930
+ spanId: INVALID_SPANID,
1931
+ traceFlags: TraceFlags.NONE
1932
+ };
1933
+
1934
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
1935
+ var NonRecordingSpan = (
1936
+ /** @class */
1937
+ (function() {
1938
+ function NonRecordingSpan2(_spanContext) {
1939
+ if (_spanContext === void 0) {
1940
+ _spanContext = INVALID_SPAN_CONTEXT;
1941
+ }
1942
+ this._spanContext = _spanContext;
1943
+ }
1944
+ NonRecordingSpan2.prototype.spanContext = function() {
1945
+ return this._spanContext;
1946
+ };
1947
+ NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
1948
+ return this;
1949
+ };
1950
+ NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
1951
+ return this;
1952
+ };
1953
+ NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
1954
+ return this;
1955
+ };
1956
+ NonRecordingSpan2.prototype.addLink = function(_link) {
1957
+ return this;
1958
+ };
1959
+ NonRecordingSpan2.prototype.addLinks = function(_links) {
1960
+ return this;
1961
+ };
1962
+ NonRecordingSpan2.prototype.setStatus = function(_status) {
1963
+ return this;
1964
+ };
1965
+ NonRecordingSpan2.prototype.updateName = function(_name) {
1966
+ return this;
1967
+ };
1968
+ NonRecordingSpan2.prototype.end = function(_endTime) {
1969
+ };
1970
+ NonRecordingSpan2.prototype.isRecording = function() {
1971
+ return false;
1972
+ };
1973
+ NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
1974
+ };
1975
+ return NonRecordingSpan2;
1976
+ })()
1977
+ );
560
1978
 
561
- <goal>
562
- Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
563
- </goal>
1979
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
1980
+ var SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
1981
+ function getSpan(context2) {
1982
+ return context2.getValue(SPAN_KEY) || void 0;
1983
+ }
1984
+ function getActiveSpan() {
1985
+ return getSpan(ContextAPI.getInstance().active());
1986
+ }
1987
+ function setSpan(context2, span) {
1988
+ return context2.setValue(SPAN_KEY, span);
1989
+ }
1990
+ function deleteSpan(context2) {
1991
+ return context2.deleteValue(SPAN_KEY);
1992
+ }
1993
+ function setSpanContext(context2, spanContext) {
1994
+ return setSpan(context2, new NonRecordingSpan(spanContext));
1995
+ }
1996
+ function getSpanContext(context2) {
1997
+ var _a;
1998
+ return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
1999
+ }
564
2000
 
565
- <scenario>
566
- ${description}
567
- </scenario>
2001
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
2002
+ var VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
2003
+ var VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
2004
+ function isValidTraceId(traceId) {
2005
+ return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
2006
+ }
2007
+ function isValidSpanId(spanId) {
2008
+ return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
2009
+ }
2010
+ function isSpanContextValid(spanContext) {
2011
+ return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
2012
+ }
2013
+ function wrapSpanContext(spanContext) {
2014
+ return new NonRecordingSpan(spanContext);
2015
+ }
568
2016
 
569
- <rules>
570
- - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
571
- </rules>
572
- `.trim();
2017
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
2018
+ var contextApi = ContextAPI.getInstance();
2019
+ var NoopTracer = (
2020
+ /** @class */
2021
+ (function() {
2022
+ function NoopTracer2() {
2023
+ }
2024
+ NoopTracer2.prototype.startSpan = function(name, options, context2) {
2025
+ if (context2 === void 0) {
2026
+ context2 = contextApi.active();
2027
+ }
2028
+ var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
2029
+ if (root) {
2030
+ return new NonRecordingSpan();
2031
+ }
2032
+ var parentFromContext = context2 && getSpanContext(context2);
2033
+ if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
2034
+ return new NonRecordingSpan(parentFromContext);
2035
+ } else {
2036
+ return new NonRecordingSpan();
2037
+ }
2038
+ };
2039
+ NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
2040
+ var opts;
2041
+ var ctx;
2042
+ var fn;
2043
+ if (arguments.length < 2) {
2044
+ return;
2045
+ } else if (arguments.length === 2) {
2046
+ fn = arg2;
2047
+ } else if (arguments.length === 3) {
2048
+ opts = arg2;
2049
+ fn = arg3;
2050
+ } else {
2051
+ opts = arg2;
2052
+ ctx = arg3;
2053
+ fn = arg4;
2054
+ }
2055
+ var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
2056
+ var span = this.startSpan(name, opts, parentContext);
2057
+ var contextWithSpanSet = setSpan(parentContext, span);
2058
+ return contextApi.with(contextWithSpanSet, fn, void 0, span);
2059
+ };
2060
+ return NoopTracer2;
2061
+ })()
2062
+ );
2063
+ function isSpanContext(spanContext) {
2064
+ return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
573
2065
  }
574
- var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
575
- constructor(cfg) {
576
- super();
577
- this.cfg = cfg;
578
- }
579
- logger = new Logger(this.constructor.name);
580
- call = async (input) => {
581
- const config2 = this.cfg;
582
- const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
583
- const messages = [
584
- { role: "system", content: systemPrompt },
585
- { role: "assistant", content: "Hello, how can I help you today" },
586
- ...input.messages
587
- ];
588
- const projectConfig = await getProjectConfig();
589
- const mergedConfig = mergeAndValidateConfig(config2 ?? {}, projectConfig);
590
- if (!mergedConfig.model) {
591
- throw new Error("Model is required for the user simulator agent");
2066
+
2067
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
2068
+ var NOOP_TRACER = new NoopTracer();
2069
+ var ProxyTracer = (
2070
+ /** @class */
2071
+ (function() {
2072
+ function ProxyTracer2(_provider, name, version, options) {
2073
+ this._provider = _provider;
2074
+ this.name = name;
2075
+ this.version = version;
2076
+ this.options = options;
592
2077
  }
593
- const reversedMessages = messageRoleReversal(messages);
594
- const completion = await this.generateText({
595
- model: mergedConfig.model,
596
- messages: reversedMessages,
597
- temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
598
- maxTokens: mergedConfig.maxTokens
599
- });
600
- const messageContent = completion.text;
601
- if (!messageContent) {
602
- throw new Error("No response content from LLM");
2078
+ ProxyTracer2.prototype.startSpan = function(name, options, context2) {
2079
+ return this._getTracer().startSpan(name, options, context2);
2080
+ };
2081
+ ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
2082
+ var tracer = this._getTracer();
2083
+ return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
2084
+ };
2085
+ ProxyTracer2.prototype._getTracer = function() {
2086
+ if (this._delegate) {
2087
+ return this._delegate;
2088
+ }
2089
+ var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
2090
+ if (!tracer) {
2091
+ return NOOP_TRACER;
2092
+ }
2093
+ this._delegate = tracer;
2094
+ return this._delegate;
2095
+ };
2096
+ return ProxyTracer2;
2097
+ })()
2098
+ );
2099
+
2100
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
2101
+ var NoopTracerProvider = (
2102
+ /** @class */
2103
+ (function() {
2104
+ function NoopTracerProvider2() {
603
2105
  }
604
- return { role: "user", content: messageContent };
605
- };
606
- async generateText(input) {
607
- try {
608
- return await (0, import_ai2.generateText)(input);
609
- } catch (error) {
610
- this.logger.error("Error generating text", { error });
611
- throw error;
2106
+ NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
2107
+ return new NoopTracer();
2108
+ };
2109
+ return NoopTracerProvider2;
2110
+ })()
2111
+ );
2112
+
2113
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
2114
+ var NOOP_TRACER_PROVIDER = new NoopTracerProvider();
2115
+ var ProxyTracerProvider = (
2116
+ /** @class */
2117
+ (function() {
2118
+ function ProxyTracerProvider2() {
612
2119
  }
613
- }
614
- };
615
- var userSimulatorAgent = (config2) => {
616
- return new UserSimulatorAgent(config2);
617
- };
2120
+ ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
2121
+ var _a;
2122
+ return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
2123
+ };
2124
+ ProxyTracerProvider2.prototype.getDelegate = function() {
2125
+ var _a;
2126
+ return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
2127
+ };
2128
+ ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
2129
+ this._delegate = delegate;
2130
+ };
2131
+ ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
2132
+ var _a;
2133
+ return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
2134
+ };
2135
+ return ProxyTracerProvider2;
2136
+ })()
2137
+ );
618
2138
 
619
- // src/execution/index.ts
620
- var execution_exports = {};
621
- __export(execution_exports, {
622
- ScenarioExecution: () => ScenarioExecution,
623
- ScenarioExecutionState: () => ScenarioExecutionState,
624
- StateChangeEventType: () => StateChangeEventType
625
- });
2139
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
2140
+ var context = ContextAPI.getInstance();
2141
+
2142
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
2143
+ var API_NAME3 = "trace";
2144
+ var TraceAPI = (
2145
+ /** @class */
2146
+ (function() {
2147
+ function TraceAPI2() {
2148
+ this._proxyTracerProvider = new ProxyTracerProvider();
2149
+ this.wrapSpanContext = wrapSpanContext;
2150
+ this.isSpanContextValid = isSpanContextValid;
2151
+ this.deleteSpan = deleteSpan;
2152
+ this.getSpan = getSpan;
2153
+ this.getActiveSpan = getActiveSpan;
2154
+ this.getSpanContext = getSpanContext;
2155
+ this.setSpan = setSpan;
2156
+ this.setSpanContext = setSpanContext;
2157
+ }
2158
+ TraceAPI2.getInstance = function() {
2159
+ if (!this._instance) {
2160
+ this._instance = new TraceAPI2();
2161
+ }
2162
+ return this._instance;
2163
+ };
2164
+ TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
2165
+ var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
2166
+ if (success) {
2167
+ this._proxyTracerProvider.setDelegate(provider);
2168
+ }
2169
+ return success;
2170
+ };
2171
+ TraceAPI2.prototype.getTracerProvider = function() {
2172
+ return getGlobal(API_NAME3) || this._proxyTracerProvider;
2173
+ };
2174
+ TraceAPI2.prototype.getTracer = function(name, version) {
2175
+ return this.getTracerProvider().getTracer(name, version);
2176
+ };
2177
+ TraceAPI2.prototype.disable = function() {
2178
+ unregisterGlobal(API_NAME3, DiagAPI.instance());
2179
+ this._proxyTracerProvider = new ProxyTracerProvider();
2180
+ };
2181
+ return TraceAPI2;
2182
+ })()
2183
+ );
2184
+
2185
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
2186
+ var trace = TraceAPI.getInstance();
626
2187
 
627
2188
  // src/execution/scenario-execution.ts
2189
+ var import_langwatch = require("langwatch");
2190
+ var import_observability3 = require("langwatch/observability");
628
2191
  var import_rxjs2 = require("rxjs");
629
2192
 
630
2193
  // src/execution/scenario-execution-state.ts
@@ -711,9 +2274,13 @@ var ScenarioExecutionState = class {
711
2274
  * Adds a message to the conversation history.
712
2275
  *
713
2276
  * @param message - The message to add.
2277
+ * @param traceId - Optional trace ID to associate with the message.
714
2278
  */
715
2279
  addMessage(message2) {
716
- const messageWithId = { ...message2, id: generateMessageId() };
2280
+ const messageWithId = {
2281
+ ...message2,
2282
+ id: generateMessageId()
2283
+ };
717
2284
  this._messages.push(messageWithId);
718
2285
  this.eventSubject.next({ type: "MESSAGE_ADDED" /* MESSAGE_ADDED */ });
719
2286
  }
@@ -769,7 +2336,7 @@ var ScenarioExecutionState = class {
769
2336
 
770
2337
  // src/events/schema.ts
771
2338
  var import_core = require("@ag-ui/core");
772
- var import_zod4 = require("zod");
2339
+ var import_zod = require("zod");
773
2340
  var Verdict = /* @__PURE__ */ ((Verdict2) => {
774
2341
  Verdict2["SUCCESS"] = "success";
775
2342
  Verdict2["FAILURE"] = "failure";
@@ -785,68 +2352,69 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
785
2352
  ScenarioRunStatus2["FAILED"] = "FAILED";
786
2353
  return ScenarioRunStatus2;
787
2354
  })(ScenarioRunStatus || {});
788
- var baseEventSchema = import_zod4.z.object({
789
- type: import_zod4.z.nativeEnum(import_core.EventType),
790
- timestamp: import_zod4.z.number(),
791
- rawEvent: import_zod4.z.any().optional()
2355
+ var baseEventSchema = import_zod.z.object({
2356
+ type: import_zod.z.nativeEnum(import_core.EventType),
2357
+ timestamp: import_zod.z.number(),
2358
+ rawEvent: import_zod.z.any().optional()
792
2359
  });
793
- var batchRunIdSchema = import_zod4.z.string();
794
- var scenarioRunIdSchema = import_zod4.z.string();
795
- var scenarioIdSchema = import_zod4.z.string();
2360
+ var batchRunIdSchema = import_zod.z.string();
2361
+ var scenarioRunIdSchema = import_zod.z.string();
2362
+ var scenarioIdSchema = import_zod.z.string();
796
2363
  var baseScenarioEventSchema = baseEventSchema.extend({
797
2364
  batchRunId: batchRunIdSchema,
798
2365
  scenarioId: scenarioIdSchema,
799
2366
  scenarioRunId: scenarioRunIdSchema,
800
- scenarioSetId: import_zod4.z.string().optional().default("default")
2367
+ scenarioSetId: import_zod.z.string().optional().default("default")
801
2368
  });
802
2369
  var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
803
- type: import_zod4.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
804
- metadata: import_zod4.z.object({
805
- name: import_zod4.z.string().optional(),
806
- description: import_zod4.z.string().optional()
2370
+ type: import_zod.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
2371
+ metadata: import_zod.z.object({
2372
+ name: import_zod.z.string().optional(),
2373
+ description: import_zod.z.string().optional()
807
2374
  })
808
2375
  });
809
- var scenarioResultsSchema = import_zod4.z.object({
810
- verdict: import_zod4.z.nativeEnum(Verdict),
811
- reasoning: import_zod4.z.string().optional(),
812
- metCriteria: import_zod4.z.array(import_zod4.z.string()),
813
- unmetCriteria: import_zod4.z.array(import_zod4.z.string()),
814
- error: import_zod4.z.string().optional()
2376
+ var scenarioResultsSchema = import_zod.z.object({
2377
+ verdict: import_zod.z.nativeEnum(Verdict),
2378
+ reasoning: import_zod.z.string().optional(),
2379
+ metCriteria: import_zod.z.array(import_zod.z.string()),
2380
+ unmetCriteria: import_zod.z.array(import_zod.z.string()),
2381
+ error: import_zod.z.string().optional()
815
2382
  });
816
2383
  var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
817
- type: import_zod4.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
818
- status: import_zod4.z.nativeEnum(ScenarioRunStatus),
2384
+ type: import_zod.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
2385
+ status: import_zod.z.nativeEnum(ScenarioRunStatus),
819
2386
  results: scenarioResultsSchema.optional().nullable()
820
2387
  });
821
2388
  var scenarioMessageSnapshotSchema = import_core.MessagesSnapshotEventSchema.merge(
822
2389
  baseScenarioEventSchema.extend({
823
- type: import_zod4.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
2390
+ type: import_zod.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
824
2391
  })
825
2392
  );
826
- var scenarioEventSchema = import_zod4.z.discriminatedUnion("type", [
2393
+ var scenarioEventSchema = import_zod.z.discriminatedUnion("type", [
827
2394
  scenarioRunStartedSchema,
828
2395
  scenarioRunFinishedSchema,
829
2396
  scenarioMessageSnapshotSchema
830
2397
  ]);
831
- var successSchema = import_zod4.z.object({ success: import_zod4.z.boolean() });
832
- var errorSchema = import_zod4.z.object({ error: import_zod4.z.string() });
833
- var stateSchema = import_zod4.z.object({
834
- state: import_zod4.z.object({
835
- messages: import_zod4.z.array(import_zod4.z.any()),
836
- status: import_zod4.z.string()
2398
+ var successSchema = import_zod.z.object({ success: import_zod.z.boolean() });
2399
+ var errorSchema = import_zod.z.object({ error: import_zod.z.string() });
2400
+ var stateSchema = import_zod.z.object({
2401
+ state: import_zod.z.object({
2402
+ messages: import_zod.z.array(import_zod.z.any()),
2403
+ status: import_zod.z.string()
837
2404
  })
838
2405
  });
839
- var runsSchema = import_zod4.z.object({ runs: import_zod4.z.array(import_zod4.z.string()) });
840
- var eventsSchema = import_zod4.z.object({ events: import_zod4.z.array(scenarioEventSchema) });
2406
+ var runsSchema = import_zod.z.object({ runs: import_zod.z.array(import_zod.z.string()) });
2407
+ var eventsSchema = import_zod.z.object({ events: import_zod.z.array(scenarioEventSchema) });
841
2408
 
842
2409
  // src/utils/convert-core-messages-to-agui-messages.ts
843
- function convertCoreMessagesToAguiMessages(coreMessages) {
2410
+ function convertModelMessagesToAguiMessages(modelMessages) {
844
2411
  const aguiMessages = [];
845
- for (const msg of coreMessages) {
2412
+ for (const msg of modelMessages) {
846
2413
  const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
847
2414
  switch (true) {
848
2415
  case msg.role === "system":
849
2416
  aguiMessages.push({
2417
+ trace_id: msg.traceId,
850
2418
  id,
851
2419
  role: "system",
852
2420
  content: msg.content
@@ -854,6 +2422,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
854
2422
  break;
855
2423
  case (msg.role === "user" && typeof msg.content === "string"):
856
2424
  aguiMessages.push({
2425
+ trace_id: msg.traceId,
857
2426
  id,
858
2427
  role: "user",
859
2428
  content: msg.content
@@ -862,6 +2431,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
862
2431
  // Handle any other user message content format
863
2432
  case (msg.role === "user" && Array.isArray(msg.content)):
864
2433
  aguiMessages.push({
2434
+ trace_id: msg.traceId,
865
2435
  id,
866
2436
  role: "user",
867
2437
  content: JSON.stringify(msg.content)
@@ -869,6 +2439,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
869
2439
  break;
870
2440
  case (msg.role === "assistant" && typeof msg.content === "string"):
871
2441
  aguiMessages.push({
2442
+ trace_id: msg.traceId,
872
2443
  id,
873
2444
  role: "assistant",
874
2445
  content: msg.content
@@ -878,6 +2449,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
878
2449
  const toolCalls = msg.content.filter((p) => p.type === "tool-call");
879
2450
  const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
880
2451
  aguiMessages.push({
2452
+ trace_id: msg.traceId,
881
2453
  id,
882
2454
  role: "assistant",
883
2455
  content: JSON.stringify(nonToolCalls),
@@ -886,7 +2458,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
886
2458
  type: "function",
887
2459
  function: {
888
2460
  name: c.toolName,
889
- arguments: JSON.stringify(c.args)
2461
+ arguments: JSON.stringify(c.input)
890
2462
  }
891
2463
  }))
892
2464
  });
@@ -894,11 +2466,13 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
894
2466
  }
895
2467
  case msg.role === "tool":
896
2468
  msg.content.map((p, i) => {
2469
+ var _a;
897
2470
  aguiMessages.push({
2471
+ trace_id: msg.traceId,
898
2472
  id: `${id}-${i}`,
899
2473
  role: "tool",
900
2474
  toolCallId: p.toolCallId,
901
- content: JSON.stringify(p.result)
2475
+ content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
902
2476
  });
903
2477
  });
904
2478
  break;
@@ -908,12 +2482,16 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
908
2482
  }
909
2483
  return aguiMessages;
910
2484
  }
911
- var convert_core_messages_to_agui_messages_default = convertCoreMessagesToAguiMessages;
2485
+ var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
912
2486
 
913
2487
  // src/execution/scenario-execution.ts
914
2488
  var ScenarioExecution = class {
2489
+ /** LangWatch tracer for scenario execution */
2490
+ tracer = (0, import_langwatch.getLangWatchTracer)("@langwatch/scenario");
915
2491
  /** The current state of the scenario execution */
916
2492
  state;
2493
+ /** The final result of the scenario execution, set when a conclusion is reached */
2494
+ _result;
917
2495
  /** Logger for debugging and monitoring */
918
2496
  logger = new Logger("scenario.execution.ScenarioExecution");
919
2497
  /** Finalized configuration with all defaults applied */
@@ -932,10 +2510,10 @@ var ScenarioExecution = class {
932
2510
  * Key: agent index, Value: array of pending messages for that agent
933
2511
  */
934
2512
  pendingMessages = /* @__PURE__ */ new Map();
935
- /** Intermediate result set by agents that make final decisions */
936
- partialResult = null;
937
2513
  /** Accumulated execution time for each agent (for performance tracking) */
938
2514
  agentTimes = /* @__PURE__ */ new Map();
2515
+ /** Current turn span for trace context management */
2516
+ currentTurnSpan;
939
2517
  /** Timestamp when execution started (for total time calculation) */
940
2518
  totalStartTime = 0;
941
2519
  /** Event stream for monitoring scenario progress */
@@ -974,7 +2552,7 @@ var ScenarioExecution = class {
974
2552
  /**
975
2553
  * Gets the complete conversation history as an array of messages.
976
2554
  *
977
- * @returns Array of CoreMessage objects representing the full conversation
2555
+ * @returns Array of ModelMessage objects representing the full conversation
978
2556
  */
979
2557
  get messages() {
980
2558
  return this.state.messages;
@@ -988,6 +2566,41 @@ var ScenarioExecution = class {
988
2566
  get threadId() {
989
2567
  return this.state.threadId;
990
2568
  }
2569
+ /**
2570
+ * Gets the result of the scenario execution if it has been set.
2571
+ *
2572
+ * @returns The scenario result or undefined if not yet set
2573
+ */
2574
+ get result() {
2575
+ return this._result;
2576
+ }
2577
+ /**
2578
+ * Sets the result of the scenario execution.
2579
+ * This is called when the scenario reaches a conclusion (success or failure).
2580
+ * Automatically includes messages, totalTime, and agentTime from the current execution context.
2581
+ *
2582
+ * @param result - The final scenario result (without messages/timing, which will be added automatically)
2583
+ */
2584
+ setResult(result) {
2585
+ const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
2586
+ const agentTimes = agentRoleAgentsIdx.map(
2587
+ (i) => this.agentTimes.get(i) || 0
2588
+ );
2589
+ const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
2590
+ this._result = {
2591
+ ...result,
2592
+ messages: this.state.messages,
2593
+ totalTime: this.totalTime,
2594
+ agentTime: totalAgentTime
2595
+ };
2596
+ this.logger.debug(`[${this.config.id}] Result set`, {
2597
+ success: result.success,
2598
+ reasoning: result.reasoning,
2599
+ totalTime: this.totalTime,
2600
+ agentTime: totalAgentTime,
2601
+ messageCount: this.state.messages.length
2602
+ });
2603
+ }
991
2604
  /**
992
2605
  * The total elapsed time for the scenario execution.
993
2606
  */
@@ -1021,8 +2634,14 @@ var ScenarioExecution = class {
1021
2634
  * ```
1022
2635
  */
1023
2636
  async execute() {
2637
+ this.logger.debug(`[${this.config.id}] Starting scenario execution`, {
2638
+ name: this.config.name,
2639
+ maxTurns: this.config.maxTurns,
2640
+ scriptLength: this.config.script.length
2641
+ });
1024
2642
  this.reset();
1025
2643
  const scenarioRunId = generateScenarioRunId();
2644
+ this.logger.debug(`[${this.config.id}] Generated run ID: ${scenarioRunId}`);
1026
2645
  this.emitRunStarted({ scenarioRunId });
1027
2646
  const subscription = this.state.events$.pipe(
1028
2647
  (0, import_rxjs2.filter)((event) => event.type === "MESSAGE_ADDED" /* MESSAGE_ADDED */)
@@ -1032,18 +2651,17 @@ var ScenarioExecution = class {
1032
2651
  try {
1033
2652
  for (let i = 0; i < this.config.script.length; i++) {
1034
2653
  const scriptStep = this.config.script[i];
1035
- const result = await this.executeScriptStep(scriptStep, i);
1036
- if (result && typeof result === "object" && "success" in result) {
2654
+ await this.executeScriptStep(scriptStep, i);
2655
+ if (this.result) {
1037
2656
  this.emitRunFinished({
1038
2657
  scenarioRunId,
1039
- status: result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
1040
- result
2658
+ status: this.result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
2659
+ result: this.result
1041
2660
  });
1042
- return result;
2661
+ return this.result;
1043
2662
  }
1044
2663
  }
1045
- this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
1046
- return this.reachedMaxTurns(
2664
+ this.reachedMaxTurns(
1047
2665
  [
1048
2666
  "Reached end of script without conclusion, add one of the following to the end of the script:",
1049
2667
  "- `Scenario.proceed()` to let the simulation continue to play out",
@@ -1051,20 +2669,21 @@ var ScenarioExecution = class {
1051
2669
  "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
1052
2670
  ].join("\n")
1053
2671
  );
2672
+ this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
2673
+ return this.result;
1054
2674
  } catch (error) {
1055
2675
  const errorInfo = extractErrorInfo(error);
1056
- const errorResult = {
2676
+ this.setResult({
1057
2677
  success: false,
1058
- messages: this.state.messages,
1059
2678
  reasoning: `Scenario failed with error: ${errorInfo.message}`,
1060
2679
  metCriteria: [],
1061
2680
  unmetCriteria: [],
1062
2681
  error: JSON.stringify(errorInfo)
1063
- };
2682
+ });
1064
2683
  this.emitRunFinished({
1065
2684
  scenarioRunId,
1066
2685
  status: "ERROR" /* ERROR */,
1067
- result: errorResult
2686
+ result: this.result
1068
2687
  });
1069
2688
  throw error;
1070
2689
  } finally {
@@ -1082,50 +2701,66 @@ var ScenarioExecution = class {
1082
2701
  * - Progress to the next turn if needed
1083
2702
  * - Find the next agent that should act
1084
2703
  * - Execute that agent's response
1085
- * - Return either new messages or a final scenario result
2704
+ * - Set the result if the scenario concludes
1086
2705
  *
1087
2706
  * Note: This method is primarily for debugging or custom execution flows. Most users
1088
2707
  * will use `execute()` to run the entire scenario automatically.
1089
2708
  *
1090
- * @returns A promise that resolves with either:
1091
- * - Array of new messages added during the agent interaction, or
1092
- * - A final ScenarioResult if the interaction concludes the scenario
1093
- * @throws Error if no result is returned from the step
2709
+ * After calling this method, check `this.result` to see if the scenario has concluded.
1094
2710
  *
1095
2711
  * @example
1096
2712
  * ```typescript
1097
2713
  * const execution = new ScenarioExecution(config, script);
1098
2714
  *
1099
2715
  * // Execute one agent interaction at a time
1100
- * const messages = await execution.step();
1101
- * if (Array.isArray(messages)) {
1102
- * console.log('New messages:', messages);
1103
- * } else {
1104
- * console.log('Scenario finished:', messages.success);
2716
+ * await execution.step();
2717
+ * if (execution.result) {
2718
+ * console.log('Scenario finished:', execution.result.success);
1105
2719
  * }
1106
2720
  * ```
1107
2721
  */
1108
2722
  async step() {
1109
- const result = await this._step();
1110
- if (result === null) throw new Error("No result from step");
1111
- return result;
2723
+ await this._step();
1112
2724
  }
1113
2725
  async _step(goToNextTurn = true, onTurn) {
2726
+ this.logger.debug(`[${this.config.id}] _step called`, {
2727
+ goToNextTurn,
2728
+ pendingRoles: this.pendingRolesOnTurn,
2729
+ currentTurn: this.state.currentTurn
2730
+ });
1114
2731
  if (this.pendingRolesOnTurn.length === 0) {
1115
- if (!goToNextTurn) return null;
2732
+ if (!goToNextTurn) {
2733
+ this.logger.debug(
2734
+ `[${this.config.id}] No pending roles, not advancing turn`
2735
+ );
2736
+ return;
2737
+ }
1116
2738
  this.newTurn();
1117
2739
  if (onTurn) await onTurn(this.state);
1118
- if (this.state.currentTurn >= this.config.maxTurns)
1119
- return this.reachedMaxTurns();
2740
+ if (this.state.currentTurn >= this.config.maxTurns) {
2741
+ this.logger.debug(
2742
+ `[${this.config.id}] Reached max turns: ${this.state.currentTurn}`
2743
+ );
2744
+ this.reachedMaxTurns();
2745
+ return;
2746
+ }
1120
2747
  }
1121
2748
  const currentRole = this.pendingRolesOnTurn[0];
1122
2749
  const { idx, agent: nextAgent } = this.nextAgentForRole(currentRole);
1123
2750
  if (!nextAgent) {
2751
+ this.logger.debug(
2752
+ `[${this.config.id}] No agent for role ${currentRole}, removing role`
2753
+ );
1124
2754
  this.removePendingRole(currentRole);
1125
2755
  return this._step(goToNextTurn, onTurn);
1126
2756
  }
2757
+ this.logger.debug(`[${this.config.id}] Calling agent`, {
2758
+ role: currentRole,
2759
+ agentIdx: idx,
2760
+ agentName: nextAgent.name ?? nextAgent.constructor.name
2761
+ });
1127
2762
  this.removePendingAgent(nextAgent);
1128
- return await this.callAgent(idx, currentRole);
2763
+ await this.callAgent(idx, currentRole);
1129
2764
  }
1130
2765
  /**
1131
2766
  * Calls a specific agent to generate a response or make a decision.
@@ -1144,19 +2779,25 @@ var ScenarioExecution = class {
1144
2779
  * After the agent responds:
1145
2780
  * - Performance timing is recorded
1146
2781
  * - Pending messages for this agent are cleared (they've been processed)
1147
- * - If the agent returns a ScenarioResult, it's returned immediately
2782
+ * - If the agent returns a ScenarioResult, it's set on this.result
1148
2783
  * - Otherwise, the agent's messages are added to the conversation and broadcast
1149
2784
  *
1150
2785
  * @param idx - The index of the agent in the agents array
1151
2786
  * @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
1152
2787
  * @param judgmentRequest - Whether this is a judgment request (for judge agents)
1153
- * @returns A promise that resolves with either:
1154
- * - Array of messages if the agent generated a response, or
1155
- * - ScenarioResult if the agent made a final decision
1156
2788
  * @throws Error if the agent call fails
1157
2789
  */
1158
2790
  async callAgent(idx, role, judgmentRequest = false) {
2791
+ var _a;
1159
2792
  const agent2 = this.agents[idx];
2793
+ const agentName = agent2.name ?? agent2.constructor.name;
2794
+ this.logger.debug(`[${this.config.id}] callAgent started`, {
2795
+ agentIdx: idx,
2796
+ role,
2797
+ judgmentRequest,
2798
+ agentName,
2799
+ pendingMessagesCount: ((_a = this.pendingMessages.get(idx)) == null ? void 0 : _a.length) ?? 0
2800
+ });
1160
2801
  const startTime = Date.now();
1161
2802
  const agentInput = {
1162
2803
  threadId: this.state.threadId,
@@ -1167,35 +2808,75 @@ var ScenarioExecution = class {
1167
2808
  scenarioState: this.state,
1168
2809
  scenarioConfig: this.config
1169
2810
  };
2811
+ const agentContext = this.currentTurnSpan ? trace.setSpan(context.active(), this.currentTurnSpan) : context.active();
2812
+ const agentSpanName = `${agentName !== Object.prototype.constructor.name ? agent2.constructor.name : "Agent"}.call`;
1170
2813
  try {
1171
- const agentResponse = await agent2.call(agentInput);
1172
- const endTime = Date.now();
1173
- this.addAgentTime(idx, endTime - startTime);
1174
- this.pendingMessages.delete(idx);
1175
- if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
1176
- return agentResponse;
1177
- }
1178
- const currentAgentTime = this.agentTimes.get(idx) ?? 0;
1179
- this.agentTimes.set(idx, currentAgentTime + (Date.now() - startTime));
1180
- const messages = convertAgentReturnTypesToMessages(
1181
- agentResponse,
1182
- role === "User" /* USER */ ? "user" : "assistant"
1183
- );
1184
- for (const message2 of messages) {
1185
- this.state.addMessage(message2);
1186
- this.broadcastMessage(message2, idx);
1187
- }
1188
- return messages;
1189
- } catch (error) {
1190
- this.logger.error(
1191
- `[${this.config.id}] Error calling agent ${agent2.constructor.name}`,
2814
+ await this.tracer.withActiveSpan(
2815
+ agentSpanName,
1192
2816
  {
1193
- error: error instanceof Error ? error.message : String(error),
1194
- agent: agent2.constructor.name,
1195
- agentInput
2817
+ attributes: {
2818
+ [import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
2819
+ }
2820
+ },
2821
+ agentContext,
2822
+ async (agentSpan) => {
2823
+ agentSpan.setType("agent");
2824
+ agentSpan.setInput("chat_messages", this.state.messages);
2825
+ const agentResponse = await agent2.call(agentInput);
2826
+ const endTime = Date.now();
2827
+ const duration = endTime - startTime;
2828
+ this.logger.debug(`[${this.config.id}] Agent responded`, {
2829
+ agentIdx: idx,
2830
+ duration,
2831
+ responseType: typeof agentResponse,
2832
+ isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
2833
+ });
2834
+ this.addAgentTime(idx, duration);
2835
+ this.pendingMessages.delete(idx);
2836
+ if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
2837
+ this.logger.debug(
2838
+ `[${this.config.id}] Agent returned ScenarioResult`,
2839
+ {
2840
+ success: agentResponse.success
2841
+ }
2842
+ );
2843
+ this.setResult(agentResponse);
2844
+ return;
2845
+ }
2846
+ const messages = convertAgentReturnTypesToMessages(
2847
+ agentResponse,
2848
+ role === "User" /* USER */ ? "user" : "assistant"
2849
+ );
2850
+ if (messages.length > 0) {
2851
+ agentSpan.setOutput("chat_messages", messages);
2852
+ }
2853
+ const metrics = {
2854
+ duration: endTime - startTime
2855
+ };
2856
+ if (agentResponse && typeof agentResponse === "object") {
2857
+ const usage = agentResponse.usage;
2858
+ if (usage) {
2859
+ if (usage.prompt_tokens !== void 0)
2860
+ metrics.promptTokens = usage.prompt_tokens;
2861
+ if (usage.completion_tokens !== void 0)
2862
+ metrics.completionTokens = usage.completion_tokens;
2863
+ if (usage.total_tokens !== void 0)
2864
+ metrics.totalTokens = usage.total_tokens;
2865
+ }
2866
+ }
2867
+ agentSpan.setMetrics(metrics);
2868
+ const traceId = agentSpan.spanContext().traceId.toString();
2869
+ for (const message2 of messages) {
2870
+ this.state.addMessage({
2871
+ ...message2,
2872
+ traceId
2873
+ });
2874
+ this.broadcastMessage(message2, idx);
2875
+ }
1196
2876
  }
1197
2877
  );
1198
- throw error;
2878
+ } catch (error) {
2879
+ throw new Error(`[${agentName}] ${error}`, { cause: error });
1199
2880
  }
1200
2881
  }
1201
2882
  /**
@@ -1207,7 +2888,7 @@ var ScenarioExecution = class {
1207
2888
  * - "assistant" messages are routed to AGENT role agents
1208
2889
  * - Other message types are added directly to the conversation
1209
2890
  *
1210
- * @param message - The CoreMessage to add to the conversation
2891
+ * @param message - The ModelMessage to add to the conversation
1211
2892
  *
1212
2893
  * @example
1213
2894
  * ```typescript
@@ -1236,7 +2917,7 @@ var ScenarioExecution = class {
1236
2917
  *
1237
2918
  * This method is part of the ScenarioExecutionLike interface used by script steps.
1238
2919
  *
1239
- * @param content - Optional content for the user's message. Can be a string or CoreMessage.
2920
+ * @param content - Optional content for the user's message. Can be a string or ModelMessage.
1240
2921
  * If not provided, the user simulator agent will generate the content.
1241
2922
  *
1242
2923
  * @example
@@ -1247,7 +2928,7 @@ var ScenarioExecution = class {
1247
2928
  * // Let user simulator generate content
1248
2929
  * await execution.user();
1249
2930
  *
1250
- * // Use a CoreMessage object
2931
+ * // Use a ModelMessage object
1251
2932
  * await execution.user({
1252
2933
  * role: "user",
1253
2934
  * content: "Tell me a joke"
@@ -1266,7 +2947,7 @@ var ScenarioExecution = class {
1266
2947
  *
1267
2948
  * This method is part of the ScenarioExecutionLike interface used by script steps.
1268
2949
  *
1269
- * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
2950
+ * @param content - Optional content for the agent's response. Can be a string or ModelMessage.
1270
2951
  * If not provided, the agent under test will generate the response.
1271
2952
  *
1272
2953
  * @example
@@ -1277,7 +2958,7 @@ var ScenarioExecution = class {
1277
2958
  * // Use provided content
1278
2959
  * await execution.agent("The weather is sunny today!");
1279
2960
  *
1280
- * // Use a CoreMessage object
2961
+ * // Use a ModelMessage object
1281
2962
  * await execution.agent({
1282
2963
  * role: "assistant",
1283
2964
  * content: "I'm here to help you with weather information."
@@ -1358,17 +3039,22 @@ var ScenarioExecution = class {
1358
3039
  * ```
1359
3040
  */
1360
3041
  async proceed(turns, onTurn, onStep) {
3042
+ this.logger.debug(`[${this.config.id}] proceed called`, {
3043
+ turns,
3044
+ currentTurn: this.state.currentTurn
3045
+ });
1361
3046
  let initialTurn = this.state.currentTurn;
1362
3047
  while (true) {
1363
3048
  const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
1364
- const nextMessage = await this._step(goToNextTurn, onTurn);
3049
+ await this._step(goToNextTurn, onTurn);
1365
3050
  if (initialTurn === null) initialTurn = this.state.currentTurn;
1366
- if (nextMessage === null) {
1367
- return null;
3051
+ if (this.result) {
3052
+ return this.result;
1368
3053
  }
1369
3054
  if (onStep) await onStep(this.state);
1370
- if (nextMessage !== null && typeof nextMessage === "object" && "success" in nextMessage)
1371
- return nextMessage;
3055
+ if (!goToNextTurn) {
3056
+ return null;
3057
+ }
1372
3058
  }
1373
3059
  }
1374
3060
  /**
@@ -1395,13 +3081,13 @@ var ScenarioExecution = class {
1395
3081
  * ```
1396
3082
  */
1397
3083
  async succeed(reasoning) {
1398
- return {
3084
+ this.setResult({
1399
3085
  success: true,
1400
- messages: this.state.messages,
1401
3086
  reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
1402
3087
  metCriteria: [],
1403
3088
  unmetCriteria: []
1404
- };
3089
+ });
3090
+ return this.result;
1405
3091
  }
1406
3092
  /**
1407
3093
  * Immediately ends the scenario with a failure verdict.
@@ -1427,13 +3113,13 @@ var ScenarioExecution = class {
1427
3113
  * ```
1428
3114
  */
1429
3115
  async fail(reasoning) {
1430
- return {
3116
+ this.setResult({
1431
3117
  success: false,
1432
- messages: this.state.messages,
1433
3118
  reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
1434
3119
  metCriteria: [],
1435
3120
  unmetCriteria: []
1436
- };
3121
+ });
3122
+ return this.result;
1437
3123
  }
1438
3124
  /**
1439
3125
  * Adds execution time for a specific agent to the performance tracking.
@@ -1456,53 +3142,6 @@ var ScenarioExecution = class {
1456
3142
  const currentTime = this.agentTimes.get(agentIdx) || 0;
1457
3143
  this.agentTimes.set(agentIdx, currentTime + time);
1458
3144
  }
1459
- /**
1460
- * Checks if a partial result has been set for the scenario.
1461
- *
1462
- * This method is used internally to determine if a scenario has already reached
1463
- * a conclusion (success or failure) but hasn't been finalized yet. Partial results
1464
- * are typically set by agents that make final decisions (like judge agents) and
1465
- * are later finalized with the complete message history.
1466
- *
1467
- * @returns True if a partial result exists, false otherwise
1468
- *
1469
- * @example
1470
- * ```typescript
1471
- * // This is typically used internally by the execution engine
1472
- * if (execution.hasResult()) {
1473
- * console.log('Scenario has reached a conclusion');
1474
- * }
1475
- * ```
1476
- */
1477
- hasResult() {
1478
- return this.partialResult !== null;
1479
- }
1480
- /**
1481
- * Sets a partial result for the scenario.
1482
- *
1483
- * This method is used internally to store intermediate results that may be
1484
- * finalized later with the complete message history. Partial results are typically
1485
- * created by agents that make final decisions (like judge agents) and contain
1486
- * the success/failure status, reasoning, and criteria evaluation, but not the
1487
- * complete message history.
1488
- *
1489
- * @param result - The partial result without the messages field. Should include
1490
- * success status, reasoning, and criteria evaluation.
1491
- *
1492
- * @example
1493
- * ```typescript
1494
- * // This is typically called internally by agents that make final decisions
1495
- * execution.setResult({
1496
- * success: true,
1497
- * reasoning: "Agent provided accurate weather information",
1498
- * metCriteria: ["Provides accurate weather data"],
1499
- * unmetCriteria: []
1500
- * });
1501
- * ```
1502
- */
1503
- setResult(result) {
1504
- this.partialResult = result;
1505
- }
1506
3145
  /**
1507
3146
  * Internal method to handle script step calls to agents.
1508
3147
  *
@@ -1515,7 +3154,7 @@ var ScenarioExecution = class {
1515
3154
  * - Progress to a new turn if no agent is available
1516
3155
  * - Execute the agent with the provided content or let it generate content
1517
3156
  * - Handle judgment requests for judge agents
1518
- * - Return a final result if the agent makes a decision
3157
+ * - Set the result if the agent makes a decision
1519
3158
  *
1520
3159
  * @param role - The role of the agent to call (USER, AGENT, or JUDGE)
1521
3160
  * @param content - Optional content to use instead of letting the agent generate it
@@ -1525,6 +3164,11 @@ var ScenarioExecution = class {
1525
3164
  * @throws Error if no agent is found for the specified role
1526
3165
  */
1527
3166
  async scriptCallAgent(role, content, judgmentRequest = false) {
3167
+ this.logger.debug(`[${this.config.id}] scriptCallAgent`, {
3168
+ role,
3169
+ hasContent: content !== void 0,
3170
+ judgmentRequest
3171
+ });
1528
3172
  this.consumeUntilRole(role);
1529
3173
  let index = -1;
1530
3174
  let agent2 = null;
@@ -1569,11 +3213,8 @@ var ScenarioExecution = class {
1569
3213
  this.broadcastMessage(message2, index);
1570
3214
  return null;
1571
3215
  }
1572
- const result = await this.callAgent(index, role, judgmentRequest);
1573
- if (result && typeof result === "object" && "success" in result) {
1574
- return result;
1575
- }
1576
- return null;
3216
+ await this.callAgent(index, role, judgmentRequest);
3217
+ return this.result ?? null;
1577
3218
  }
1578
3219
  /**
1579
3220
  * Resets the scenario execution to its initial state.
@@ -1589,8 +3230,14 @@ var ScenarioExecution = class {
1589
3230
  * - Starts the first turn
1590
3231
  * - Records the start time for performance tracking
1591
3232
  * - Clears any pending messages
3233
+ * - Clears the result from any previous execution
1592
3234
  */
1593
3235
  reset() {
3236
+ this.logger.debug(`[${this.config.id}] Resetting scenario execution`);
3237
+ if (this.currentTurnSpan) {
3238
+ this.currentTurnSpan.end();
3239
+ this.currentTurnSpan = void 0;
3240
+ }
1594
3241
  this.state = new ScenarioExecutionState(this.config);
1595
3242
  this.state.threadId = this.config.threadId || generateThreadId();
1596
3243
  this.setAgents(this.config.agents);
@@ -1598,6 +3245,11 @@ var ScenarioExecution = class {
1598
3245
  this.state.currentTurn = 0;
1599
3246
  this.totalStartTime = Date.now();
1600
3247
  this.pendingMessages.clear();
3248
+ this._result = void 0;
3249
+ this.logger.debug(`[${this.config.id}] Reset complete`, {
3250
+ threadId: this.state.threadId,
3251
+ agentCount: this.agents.length
3252
+ });
1601
3253
  }
1602
3254
  nextAgentForRole(role) {
1603
3255
  for (const agent2 of this.agents) {
@@ -1618,6 +3270,11 @@ var ScenarioExecution = class {
1618
3270
  * multiple agent interactions as agents respond to each other's messages.
1619
3271
  */
1620
3272
  newTurn() {
3273
+ const previousTurn = this.state.currentTurn;
3274
+ if (this.currentTurnSpan) {
3275
+ this.currentTurnSpan.end();
3276
+ this.currentTurnSpan = void 0;
3277
+ }
1621
3278
  this.pendingAgentsOnTurn = new Set(this.agents);
1622
3279
  this.pendingRolesOnTurn = [
1623
3280
  "User" /* USER */,
@@ -1629,6 +3286,19 @@ var ScenarioExecution = class {
1629
3286
  } else {
1630
3287
  this.state.currentTurn++;
1631
3288
  }
3289
+ this.logger.debug(`[${this.config.id}] New turn started`, {
3290
+ previousTurn,
3291
+ currentTurn: this.state.currentTurn,
3292
+ agentCount: this.agents.length
3293
+ });
3294
+ this.currentTurnSpan = this.tracer.startSpan("Scenario Turn", {
3295
+ attributes: {
3296
+ "scenario.name": this.config.name,
3297
+ "scenario.id": this.config.id,
3298
+ [import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
3299
+ "scenario.turn": this.state.currentTurn
3300
+ }
3301
+ });
1632
3302
  }
1633
3303
  removePendingRole(role) {
1634
3304
  const index = this.pendingRolesOnTurn.indexOf(role);
@@ -1664,7 +3334,7 @@ var ScenarioExecution = class {
1664
3334
  *
1665
3335
  * This method is called when the scenario execution reaches the maximum number
1666
3336
  * of turns without reaching a conclusion. It creates a failure result with
1667
- * appropriate reasoning and includes performance metrics.
3337
+ * appropriate reasoning and includes performance metrics, then sets it on this.result.
1668
3338
  *
1669
3339
  * The result includes:
1670
3340
  * - All messages from the conversation
@@ -1674,24 +3344,15 @@ var ScenarioExecution = class {
1674
3344
  * - Total execution time and agent response times
1675
3345
  *
1676
3346
  * @param errorMessage - Optional custom error message to use instead of the default
1677
- * @returns A ScenarioResult indicating failure due to reaching max turns
1678
3347
  */
1679
3348
  reachedMaxTurns(errorMessage) {
1680
3349
  var _a;
1681
- const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
1682
- const agentTimes = agentRoleAgentsIdx.map(
1683
- (i) => this.agentTimes.get(i) || 0
1684
- );
1685
- const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
1686
- return {
3350
+ this.setResult({
1687
3351
  success: false,
1688
- messages: this.state.messages,
1689
3352
  reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
1690
3353
  metCriteria: [],
1691
- unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? [],
1692
- totalTime: this.totalTime,
1693
- agentTime: totalAgentTime
1694
- };
3354
+ unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? []
3355
+ });
1695
3356
  }
1696
3357
  getJudgeAgent() {
1697
3358
  return this.agents.find((agent2) => agent2 instanceof JudgeAgentAdapter) ?? null;
@@ -1763,6 +3424,10 @@ var ScenarioExecution = class {
1763
3424
  };
1764
3425
  this.emitEvent(event);
1765
3426
  this.eventSubject.complete();
3427
+ if (this.currentTurnSpan) {
3428
+ this.currentTurnSpan.end();
3429
+ this.currentTurnSpan = void 0;
3430
+ }
1766
3431
  }
1767
3432
  /**
1768
3433
  * Distributes a message to all other agents in the scenario.
@@ -1794,13 +3459,20 @@ var ScenarioExecution = class {
1794
3459
  * ```
1795
3460
  */
1796
3461
  broadcastMessage(message2, fromAgentIdx) {
3462
+ const recipients = [];
1797
3463
  for (let idx = 0; idx < this.agents.length; idx++) {
1798
3464
  if (idx === fromAgentIdx) continue;
1799
3465
  if (!this.pendingMessages.has(idx)) {
1800
3466
  this.pendingMessages.set(idx, []);
1801
3467
  }
1802
3468
  this.pendingMessages.get(idx).push(message2);
3469
+ recipients.push(idx);
1803
3470
  }
3471
+ this.logger.debug(`[${this.config.id}] Broadcast message`, {
3472
+ role: message2.role,
3473
+ fromAgentIdx,
3474
+ recipients
3475
+ });
1804
3476
  }
1805
3477
  /**
1806
3478
  * Executes a single script step with proper error handling and logging.
@@ -1859,7 +3531,8 @@ function convertAgentReturnTypesToMessages(response, role) {
1859
3531
  if (typeof response === "string")
1860
3532
  return [{ role, content: response }];
1861
3533
  if (Array.isArray(response)) return response;
1862
- if (typeof response === "object" && "role" in response) return [response];
3534
+ if (response && typeof response === "object" && "role" in response)
3535
+ return [response];
1863
3536
  return [];
1864
3537
  }
1865
3538
  function extractErrorInfo(error) {
@@ -1886,9 +3559,27 @@ __export(runner_exports, {
1886
3559
  var import_rxjs3 = require("rxjs");
1887
3560
 
1888
3561
  // src/events/event-alert-message-logger.ts
3562
+ var fs2 = __toESM(require("fs"));
3563
+ var os = __toESM(require("os"));
3564
+ var path2 = __toESM(require("path"));
1889
3565
  var import_open = __toESM(require("open"));
1890
- var EventAlertMessageLogger = class _EventAlertMessageLogger {
1891
- static shownBatchIds = /* @__PURE__ */ new Set();
3566
+ var EventAlertMessageLogger = class {
3567
+ /**
3568
+ * Creates a coordination file to prevent duplicate messages across processes.
3569
+ * Returns true if this process should show the message (first one to create the file).
3570
+ */
3571
+ createCoordinationFile(type) {
3572
+ try {
3573
+ const batchId = getBatchRunId();
3574
+ const tmpDir = os.tmpdir();
3575
+ const fileName = `scenario-${type}-${batchId}`;
3576
+ const filePath = path2.join(tmpDir, fileName);
3577
+ fs2.writeFileSync(filePath, process.pid.toString(), { flag: "wx" });
3578
+ return true;
3579
+ } catch {
3580
+ return false;
3581
+ }
3582
+ }
1892
3583
  /**
1893
3584
  * Shows a fancy greeting message about simulation reporting status.
1894
3585
  * Only shows once per batch run to avoid spam.
@@ -1897,10 +3588,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
1897
3588
  if (this.isGreetingDisabled()) {
1898
3589
  return;
1899
3590
  }
1900
- if (_EventAlertMessageLogger.shownBatchIds.has(getBatchRunId())) {
3591
+ if (!this.createCoordinationFile("greeting")) {
1901
3592
  return;
1902
3593
  }
1903
- _EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
1904
3594
  this.displayGreeting();
1905
3595
  }
1906
3596
  /**
@@ -1911,6 +3601,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
1911
3601
  if (this.isGreetingDisabled()) {
1912
3602
  return;
1913
3603
  }
3604
+ if (!this.createCoordinationFile(`watch-${params.scenarioSetId}`)) {
3605
+ return;
3606
+ }
1914
3607
  await this.displayWatchMessage(params);
1915
3608
  }
1916
3609
  isGreetingDisabled() {
@@ -1998,6 +3691,7 @@ var EventReporter = class {
1998
3691
  } else {
1999
3692
  const errorText = await response.text();
2000
3693
  this.logger.error(`[${event.type}] Event POST failed:`, {
3694
+ endpoint: this.eventsEndpoint.href,
2001
3695
  status: response.status,
2002
3696
  statusText: response.statusText,
2003
3697
  error: errorText,
@@ -2155,19 +3849,27 @@ var agent = (content) => {
2155
3849
  return (_state, executor) => executor.agent(content);
2156
3850
  };
2157
3851
  var judge = (content) => {
2158
- return (_state, executor) => executor.judge(content);
3852
+ return async (_state, executor) => {
3853
+ await executor.judge(content);
3854
+ };
2159
3855
  };
2160
3856
  var user = (content) => {
2161
3857
  return (_state, executor) => executor.user(content);
2162
3858
  };
2163
3859
  var proceed = (turns, onTurn, onStep) => {
2164
- return (_state, executor) => executor.proceed(turns, onTurn, onStep);
3860
+ return async (_state, executor) => {
3861
+ await executor.proceed(turns, onTurn, onStep);
3862
+ };
2165
3863
  };
2166
3864
  var succeed = (reasoning) => {
2167
- return (_state, executor) => executor.succeed(reasoning);
3865
+ return async (_state, executor) => {
3866
+ await executor.succeed(reasoning);
3867
+ };
2168
3868
  };
2169
3869
  var fail = (reasoning) => {
2170
- return (_state, executor) => executor.fail(reasoning);
3870
+ return async (_state, executor) => {
3871
+ await executor.fail(reasoning);
3872
+ };
2171
3873
  };
2172
3874
 
2173
3875
  // src/runner/run.ts
@@ -2178,7 +3880,7 @@ async function run(cfg) {
2178
3880
  if (!cfg.description) {
2179
3881
  throw new Error("Scenario description is required");
2180
3882
  }
2181
- if ((cfg.maxTurns || 10) < 1) {
3883
+ if (cfg.maxTurns && cfg.maxTurns < 1) {
2182
3884
  throw new Error("Max turns must be at least 1");
2183
3885
  }
2184
3886
  if (cfg.agents.length === 0) {
@@ -2200,10 +3902,10 @@ async function run(cfg) {
2200
3902
  let eventBus = null;
2201
3903
  let subscription = null;
2202
3904
  try {
2203
- const envConfig = getEnv();
3905
+ const envConfig2 = getEnv();
2204
3906
  eventBus = new EventBus({
2205
- endpoint: envConfig.LANGWATCH_ENDPOINT,
2206
- apiKey: envConfig.LANGWATCH_API_KEY
3907
+ endpoint: envConfig2.LANGWATCH_ENDPOINT,
3908
+ apiKey: envConfig2.LANGWATCH_API_KEY
2207
3909
  });
2208
3910
  eventBus.listen();
2209
3911
  subscription = eventBus.subscribeTo(execution.events$);
@@ -2254,14 +3956,13 @@ function formatPart(part) {
2254
3956
  case "file":
2255
3957
  return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
2256
3958
  case "tool-call":
2257
- return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
3959
+ return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
2258
3960
  case "tool-result":
2259
- return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
3961
+ return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
2260
3962
  case "reasoning":
2261
3963
  return `(reasoning): ${part.text}`;
2262
- case "redacted-reasoning":
2263
- return `(redacted reasoning): ${part.data}`;
2264
3964
  default:
3965
+ part;
2265
3966
  return `Unknown content: ${JSON.stringify(part)}`;
2266
3967
  }
2267
3968
  }
@@ -2280,9 +3981,11 @@ var index_default = scenario;
2280
3981
  AgentAdapter,
2281
3982
  AgentRole,
2282
3983
  DEFAULT_MAX_TURNS,
2283
- DEFAULT_TEMPERATURE,
2284
3984
  DEFAULT_VERBOSE,
2285
3985
  JudgeAgentAdapter,
3986
+ JudgeSpanCollector,
3987
+ JudgeSpanDigestFormatter,
3988
+ RealtimeAgentAdapter,
2286
3989
  ScenarioExecution,
2287
3990
  ScenarioExecutionState,
2288
3991
  StateChangeEventType,
@@ -2293,6 +3996,8 @@ var index_default = scenario;
2293
3996
  fail,
2294
3997
  judge,
2295
3998
  judgeAgent,
3999
+ judgeSpanCollector,
4000
+ judgeSpanDigestFormatter,
2296
4001
  message,
2297
4002
  proceed,
2298
4003
  run,