@langwatch/scenario 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -17,11 +17,11 @@ import {
17
17
  getBatchRunId,
18
18
  getProjectConfig,
19
19
  scenarioProjectConfigSchema
20
- } from "./chunk-7H6OGEQ5.mjs";
20
+ } from "./chunk-7HLDX5EL.mjs";
21
21
  import {
22
22
  Logger,
23
- env
24
- } from "./chunk-YPJZSK4J.mjs";
23
+ getEnv
24
+ } from "./chunk-OL4RFXV4.mjs";
25
25
  import {
26
26
  __export
27
27
  } from "./chunk-7P6ASYW6.mjs";
@@ -163,88 +163,109 @@ function buildFinishTestTool(criteria) {
163
163
  })
164
164
  });
165
165
  }
166
- var judgeAgent = (cfg) => {
167
- return {
168
- role: "Judge" /* JUDGE */,
169
- criteria: cfg.criteria,
170
- call: async (input) => {
171
- var _a;
172
- const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
173
- const messages = [
174
- { role: "system", content: systemPrompt },
175
- ...input.messages
176
- ];
177
- const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
178
- const projectConfig = await getProjectConfig();
179
- const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
180
- if (!mergedConfig.model) {
181
- throw new Error("Model is required for the judge agent");
182
- }
183
- const tools = {
184
- continue_test: buildContinueTestTool(),
185
- finish_test: buildFinishTestTool(cfg.criteria)
186
- };
187
- const enforceJudgement = input.judgmentRequest;
188
- const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
189
- if (enforceJudgement && !hasCriteria) {
190
- return {
191
- success: false,
192
- messages: [],
193
- reasoning: "JudgeAgent: No criteria was provided to be judged against",
194
- metCriteria: [],
195
- unmetCriteria: []
196
- };
197
- }
198
- const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
199
- const completion = await generateText({
200
- model: mergedConfig.model,
201
- messages,
202
- temperature: mergedConfig.temperature ?? 0,
203
- maxTokens: mergedConfig.maxTokens,
204
- tools,
205
- toolChoice
206
- });
207
- let args;
208
- if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
209
- const toolCall = completion.toolCalls[0];
210
- switch (toolCall.toolName) {
211
- case "finish_test": {
212
- args = toolCall.args;
213
- const verdict = args.verdict || "inconclusive";
214
- const reasoning = args.reasoning || "No reasoning provided";
215
- const criteria = args.criteria || {};
216
- const criteriaValues = Object.values(criteria);
217
- const metCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] === "true");
218
- const unmetCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] !== "true");
219
- return {
220
- success: verdict === "success",
221
- messages: input.messages,
222
- reasoning,
223
- metCriteria,
224
- unmetCriteria
225
- };
226
- }
227
- case "continue_test":
228
- return [];
229
- default:
230
- return {
231
- success: false,
232
- messages: input.messages,
233
- reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
234
- metCriteria: [],
235
- unmetCriteria: cfg.criteria
236
- };
237
- }
238
- }
166
+ var JudgeAgent = class extends JudgeAgentAdapter {
167
+ constructor(cfg) {
168
+ super();
169
+ this.cfg = cfg;
170
+ this.criteria = cfg.criteria;
171
+ this.role = "Judge" /* JUDGE */;
172
+ }
173
+ logger = new Logger("JudgeAgent");
174
+ role = "Judge" /* JUDGE */;
175
+ criteria;
176
+ async call(input) {
177
+ var _a;
178
+ const cfg = this.cfg;
179
+ const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
180
+ const messages = [
181
+ { role: "system", content: systemPrompt },
182
+ ...input.messages
183
+ ];
184
+ const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
185
+ const projectConfig = await getProjectConfig();
186
+ const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
187
+ if (!mergedConfig.model) {
188
+ throw new Error("Model is required for the judge agent");
189
+ }
190
+ const tools = {
191
+ continue_test: buildContinueTestTool(),
192
+ finish_test: buildFinishTestTool(cfg.criteria)
193
+ };
194
+ const enforceJudgement = input.judgmentRequest;
195
+ const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
196
+ if (enforceJudgement && !hasCriteria) {
239
197
  return {
240
198
  success: false,
241
- messages: input.messages,
242
- reasoning: `JudgeAgent: No tool call found in LLM output`,
199
+ messages: [],
200
+ reasoning: "JudgeAgent: No criteria was provided to be judged against",
243
201
  metCriteria: [],
244
- unmetCriteria: cfg.criteria
202
+ unmetCriteria: []
245
203
  };
246
204
  }
247
- };
205
+ const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
206
+ const completion = await this.generateText({
207
+ model: mergedConfig.model,
208
+ messages,
209
+ temperature: mergedConfig.temperature ?? 0,
210
+ maxTokens: mergedConfig.maxTokens,
211
+ tools,
212
+ toolChoice
213
+ });
214
+ let args;
215
+ if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
216
+ const toolCall = completion.toolCalls[0];
217
+ switch (toolCall.toolName) {
218
+ case "finish_test": {
219
+ args = toolCall.args;
220
+ const verdict = args.verdict || "inconclusive";
221
+ const reasoning = args.reasoning || "No reasoning provided";
222
+ const criteria = args.criteria || {};
223
+ const criteriaValues = Object.values(criteria);
224
+ const metCriteria = cfg.criteria.filter(
225
+ (_, i) => criteriaValues[i] === "true"
226
+ );
227
+ const unmetCriteria = cfg.criteria.filter(
228
+ (_, i) => criteriaValues[i] !== "true"
229
+ );
230
+ return {
231
+ success: verdict === "success",
232
+ messages: input.messages,
233
+ reasoning,
234
+ metCriteria,
235
+ unmetCriteria
236
+ };
237
+ }
238
+ case "continue_test":
239
+ return [];
240
+ default:
241
+ return {
242
+ success: false,
243
+ messages: input.messages,
244
+ reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
245
+ metCriteria: [],
246
+ unmetCriteria: cfg.criteria
247
+ };
248
+ }
249
+ }
250
+ return {
251
+ success: false,
252
+ messages: input.messages,
253
+ reasoning: `JudgeAgent: No tool call found in LLM output`,
254
+ metCriteria: [],
255
+ unmetCriteria: cfg.criteria
256
+ };
257
+ }
258
+ async generateText(input) {
259
+ try {
260
+ return await generateText(input);
261
+ } catch (error) {
262
+ this.logger.error("Error generating text", { error });
263
+ throw error;
264
+ }
265
+ }
266
+ };
267
+ var judgeAgent = (cfg) => {
268
+ return new JudgeAgent(cfg);
248
269
  };
249
270
 
250
271
  // src/agents/user-simulator-agent.ts
@@ -269,52 +290,75 @@ ${description}
269
290
  </rules>
270
291
  `.trim();
271
292
  }
272
- var userSimulatorAgent = (config) => {
273
- return {
274
- role: "User" /* USER */,
275
- call: async (input) => {
276
- const systemPrompt = (config == null ? void 0 : config.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
277
- const messages = [
278
- { role: "system", content: systemPrompt },
279
- { role: "assistant", content: "Hello, how can I help you today" },
280
- ...input.messages
281
- ];
282
- const projectConfig = await getProjectConfig();
283
- const mergedConfig = mergeAndValidateConfig(config ?? {}, projectConfig);
284
- if (!mergedConfig.model) {
285
- throw new Error("Model is required for the user simulator agent");
286
- }
287
- const reversedMessages = messageRoleReversal(messages);
288
- const completion = await generateText2({
289
- model: mergedConfig.model,
290
- messages: reversedMessages,
291
- temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
292
- maxTokens: mergedConfig.maxTokens
293
- });
294
- const messageContent = completion.text;
295
- if (!messageContent) {
296
- throw new Error("No response content from LLM");
297
- }
298
- return { role: "user", content: messageContent };
293
+ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
294
+ constructor(cfg) {
295
+ super();
296
+ this.cfg = cfg;
297
+ }
298
+ logger = new Logger(this.constructor.name);
299
+ call = async (input) => {
300
+ const config = this.cfg;
301
+ const systemPrompt = (config == null ? void 0 : config.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
302
+ const messages = [
303
+ { role: "system", content: systemPrompt },
304
+ { role: "assistant", content: "Hello, how can I help you today" },
305
+ ...input.messages
306
+ ];
307
+ const projectConfig = await getProjectConfig();
308
+ const mergedConfig = mergeAndValidateConfig(config ?? {}, projectConfig);
309
+ if (!mergedConfig.model) {
310
+ throw new Error("Model is required for the user simulator agent");
311
+ }
312
+ const reversedMessages = messageRoleReversal(messages);
313
+ const completion = await this.generateText({
314
+ model: mergedConfig.model,
315
+ messages: reversedMessages,
316
+ temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
317
+ maxTokens: mergedConfig.maxTokens
318
+ });
319
+ const messageContent = completion.text;
320
+ if (!messageContent) {
321
+ throw new Error("No response content from LLM");
299
322
  }
323
+ return { role: "user", content: messageContent };
300
324
  };
325
+ async generateText(input) {
326
+ try {
327
+ return await generateText2(input);
328
+ } catch (error) {
329
+ this.logger.error("Error generating text", { error });
330
+ throw error;
331
+ }
332
+ }
333
+ };
334
+ var userSimulatorAgent = (config) => {
335
+ return new UserSimulatorAgent(config);
301
336
  };
302
337
 
303
338
  // src/execution/index.ts
304
339
  var execution_exports = {};
305
340
  __export(execution_exports, {
306
341
  ScenarioExecution: () => ScenarioExecution,
307
- ScenarioExecutionState: () => ScenarioExecutionState
342
+ ScenarioExecutionState: () => ScenarioExecutionState,
343
+ StateChangeEventType: () => StateChangeEventType
308
344
  });
309
345
 
310
346
  // src/execution/scenario-execution.ts
311
- import { Subject } from "rxjs";
347
+ import { filter, Subject as Subject2 } from "rxjs";
312
348
 
313
349
  // src/execution/scenario-execution-state.ts
350
+ import { Subject } from "rxjs";
351
+ var StateChangeEventType = /* @__PURE__ */ ((StateChangeEventType2) => {
352
+ StateChangeEventType2["MESSAGE_ADDED"] = "MESSAGE_ADDED";
353
+ return StateChangeEventType2;
354
+ })(StateChangeEventType || {});
314
355
  var ScenarioExecutionState = class {
315
356
  _messages = [];
316
357
  _currentTurn = 0;
317
358
  _threadId = "";
359
+ /** Event stream for message additions */
360
+ eventSubject = new Subject();
361
+ events$ = this.eventSubject.asObservable();
318
362
  description;
319
363
  config;
320
364
  constructor(config) {
@@ -342,7 +386,9 @@ var ScenarioExecutionState = class {
342
386
  * @param message - The message to add.
343
387
  */
344
388
  addMessage(message2) {
345
- this._messages.push({ ...message2, id: generateMessageId() });
389
+ const messageWithId = { ...message2, id: generateMessageId() };
390
+ this._messages.push(messageWithId);
391
+ this.eventSubject.next({ type: "MESSAGE_ADDED" /* MESSAGE_ADDED */ });
346
392
  }
347
393
  lastMessage() {
348
394
  if (this._messages.length === 0) {
@@ -354,7 +400,9 @@ var ScenarioExecutionState = class {
354
400
  if (this._messages.length === 0) {
355
401
  throw new Error("No messages in history");
356
402
  }
357
- const lastMessage = this._messages.findLast((message2) => message2.role === "user");
403
+ const lastMessage = this._messages.findLast(
404
+ (message2) => message2.role === "user"
405
+ );
358
406
  if (!lastMessage) {
359
407
  throw new Error("No user message in history");
360
408
  }
@@ -364,7 +412,9 @@ var ScenarioExecutionState = class {
364
412
  if (this._messages.length === 0) {
365
413
  throw new Error("No messages in history");
366
414
  }
367
- const lastMessage = this._messages.findLast((message2) => message2.role === "assistant");
415
+ const lastMessage = this._messages.findLast(
416
+ (message2) => message2.role === "assistant"
417
+ );
368
418
  if (!lastMessage) {
369
419
  throw new Error("No agent message in history");
370
420
  }
@@ -374,9 +424,11 @@ var ScenarioExecutionState = class {
374
424
  if (this._messages.length === 0) {
375
425
  throw new Error("No messages in history");
376
426
  }
377
- const lastMessage = this._messages.findLast((message2) => message2.role === "tool" && message2.content.find(
378
- (part) => part.type === "tool-result" && part.toolName === toolName
379
- ));
427
+ const lastMessage = this._messages.findLast(
428
+ (message2) => message2.role === "tool" && message2.content.find(
429
+ (part) => part.type === "tool-result" && part.toolName === toolName
430
+ )
431
+ );
380
432
  return lastMessage;
381
433
  }
382
434
  hasToolCall(toolName) {
@@ -388,7 +440,7 @@ var ScenarioExecutionState = class {
388
440
  }
389
441
  };
390
442
 
391
- // src/utils/message-conversion.ts
443
+ // src/utils/convert-core-messages-to-agui-messages.ts
392
444
  function convertCoreMessagesToAguiMessages(coreMessages) {
393
445
  const aguiMessages = [];
394
446
  for (const msg of coreMessages) {
@@ -457,30 +509,53 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
457
509
  }
458
510
  return aguiMessages;
459
511
  }
460
- var message_conversion_default = convertCoreMessagesToAguiMessages;
512
+ var convert_core_messages_to_agui_messages_default = convertCoreMessagesToAguiMessages;
461
513
 
462
514
  // src/execution/scenario-execution.ts
463
515
  var ScenarioExecution = class {
516
+ /** The current state of the scenario execution */
464
517
  state;
465
- eventSubject = new Subject();
518
+ /** Logger for debugging and monitoring */
466
519
  logger = new Logger("scenario.execution.ScenarioExecution");
520
+ /** Finalized configuration with all defaults applied */
467
521
  config;
522
+ /** Array of all agents participating in the scenario */
468
523
  agents = [];
524
+ /** Roles that still need to act in the current turn (USER, AGENT, JUDGE) */
469
525
  pendingRolesOnTurn = [];
526
+ /** Agents that still need to act in the current turn */
470
527
  pendingAgentsOnTurn = /* @__PURE__ */ new Set();
528
+ /**
529
+ * Message queues for each agent. When an agent sends a message, it gets
530
+ * broadcast to all other agents' pending message queues. When an agent
531
+ * is called, it receives these pending messages as part of its input.
532
+ *
533
+ * Key: agent index, Value: array of pending messages for that agent
534
+ */
471
535
  pendingMessages = /* @__PURE__ */ new Map();
536
+ /** Intermediate result set by agents that make final decisions */
472
537
  partialResult = null;
538
+ /** Accumulated execution time for each agent (for performance tracking) */
473
539
  agentTimes = /* @__PURE__ */ new Map();
540
+ /** Timestamp when execution started (for total time calculation) */
474
541
  totalStartTime = 0;
542
+ /** Event stream for monitoring scenario progress */
543
+ eventSubject = new Subject2();
475
544
  /**
476
545
  * An observable stream of events that occur during the scenario execution.
477
546
  * Subscribe to this to monitor the progress of the scenario in real-time.
547
+ *
548
+ * Events include:
549
+ * - RUN_STARTED: When scenario execution begins
550
+ * - MESSAGE_SNAPSHOT: After each message is added to the conversation
551
+ * - RUN_FINISHED: When scenario execution completes (success/failure/error)
478
552
  */
479
553
  events$ = this.eventSubject.asObservable();
480
554
  /**
481
555
  * Creates a new ScenarioExecution instance.
482
- * @param config The scenario configuration.
483
- * @param script The script steps to execute.
556
+ *
557
+ * @param config - The scenario configuration containing agents, settings, and metadata
558
+ * @param script - The ordered sequence of script steps that define the test flow
484
559
  */
485
560
  constructor(config, script) {
486
561
  this.config = {
@@ -498,13 +573,18 @@ var ScenarioExecution = class {
498
573
  this.reset();
499
574
  }
500
575
  /**
501
- * The history of messages in the conversation.
576
+ * Gets the complete conversation history as an array of messages.
577
+ *
578
+ * @returns Array of CoreMessage objects representing the full conversation
502
579
  */
503
580
  get messages() {
504
581
  return this.state.messages;
505
582
  }
506
583
  /**
507
- * The unique identifier for the conversation thread.
584
+ * Gets the unique identifier for the conversation thread.
585
+ * This ID is used to maintain conversation context across multiple runs.
586
+ *
587
+ * @returns The thread identifier string
508
588
  */
509
589
  get threadId() {
510
590
  return this.state.threadId;
@@ -517,21 +597,43 @@ var ScenarioExecution = class {
517
597
  }
518
598
  /**
519
599
  * Executes the entire scenario from start to finish.
520
- * This will run through the script and any automatic proceeding logic until a
521
- * final result (success, failure, or error) is determined.
522
- * @returns A promise that resolves with the final result of the scenario.
600
+ *
601
+ * This method runs through all script steps sequentially until a final result
602
+ * (success, failure, or error) is determined. Each script step can trigger one or
603
+ * more agent interactions depending on the step type:
604
+ * - `user()` and `agent()` steps typically trigger one agent interaction each
605
+ * - `proceed()` steps can trigger multiple agent interactions across multiple turns
606
+ * - `judge()` steps trigger the judge agent to evaluate the conversation
607
+ * - `succeed()` and `fail()` steps immediately end the scenario
608
+ *
609
+ * The execution will stop early if:
610
+ * - A script step returns a ScenarioResult
611
+ * - The maximum number of turns is reached
612
+ * - An error occurs during execution
613
+ *
614
+ * @returns A promise that resolves with the final result of the scenario
615
+ * @throws Error if an unhandled exception occurs during execution
616
+ *
617
+ * @example
618
+ * ```typescript
619
+ * const execution = new ScenarioExecution(config, script);
620
+ * const result = await execution.execute();
621
+ * console.log(`Scenario ${result.success ? 'passed' : 'failed'}`);
622
+ * ```
523
623
  */
524
624
  async execute() {
525
625
  this.reset();
526
626
  const scenarioRunId = generateScenarioRunId();
527
627
  this.emitRunStarted({ scenarioRunId });
628
+ const subscription = this.state.events$.pipe(
629
+ filter((event) => event.type === "MESSAGE_ADDED" /* MESSAGE_ADDED */)
630
+ ).subscribe(() => {
631
+ this.emitMessageSnapshot({ scenarioRunId });
632
+ });
528
633
  try {
529
- for (const scriptStep of this.config.script) {
530
- this.logger.debug(`[${this.config.id}] Executing script step`, {
531
- scriptStep
532
- });
533
- const result = await scriptStep(this.state, this);
534
- this.emitMessageSnapshot({ scenarioRunId });
634
+ for (let i = 0; i < this.config.script.length; i++) {
635
+ const scriptStep = this.config.script[i];
636
+ const result = await this.executeScriptStep(scriptStep, i);
535
637
  if (result && typeof result === "object" && "success" in result) {
536
638
  this.emitRunFinished({
537
639
  scenarioRunId,
@@ -551,27 +653,58 @@ var ScenarioExecution = class {
551
653
  ].join("\n")
552
654
  );
553
655
  } catch (error) {
656
+ const errorInfo = extractErrorInfo(error);
554
657
  const errorResult = {
555
658
  success: false,
556
659
  messages: this.state.messages,
557
- reasoning: `Scenario failed with error: ${error instanceof Error ? error.message : String(error)}`,
660
+ reasoning: `Scenario failed with error: ${errorInfo.message}`,
558
661
  metCriteria: [],
559
662
  unmetCriteria: [],
560
- error: error instanceof Error ? error.message : String(error)
663
+ error: JSON.stringify(errorInfo)
561
664
  };
562
665
  this.emitRunFinished({
563
666
  scenarioRunId,
564
667
  status: "ERROR" /* ERROR */,
565
668
  result: errorResult
566
669
  });
567
- return errorResult;
670
+ throw error;
671
+ } finally {
672
+ subscription.unsubscribe();
568
673
  }
569
674
  }
570
675
  /**
571
- * Executes a single step in the scenario.
572
- * A step usually corresponds to a single agent's turn. This method is useful
573
- * for manually controlling the scenario's progress.
574
- * @returns A promise that resolves with the new messages added during the step, or a final scenario result if the step concludes the scenario.
676
+ * Executes a single agent interaction in the scenario.
677
+ *
678
+ * This method is for manual step-by-step execution of the scenario, where each call
679
+ * represents one agent taking their turn. This is different from script steps (like
680
+ * `user()`, `agent()`, `proceed()`, etc.) which are functions in the scenario script.
681
+ *
682
+ * Each call to this method will:
683
+ * - Progress to the next turn if needed
684
+ * - Find the next agent that should act
685
+ * - Execute that agent's response
686
+ * - Return either new messages or a final scenario result
687
+ *
688
+ * Note: This method is primarily for debugging or custom execution flows. Most users
689
+ * will use `execute()` to run the entire scenario automatically.
690
+ *
691
+ * @returns A promise that resolves with either:
692
+ * - Array of new messages added during the agent interaction, or
693
+ * - A final ScenarioResult if the interaction concludes the scenario
694
+ * @throws Error if no result is returned from the step
695
+ *
696
+ * @example
697
+ * ```typescript
698
+ * const execution = new ScenarioExecution(config, script);
699
+ *
700
+ * // Execute one agent interaction at a time
701
+ * const messages = await execution.step();
702
+ * if (Array.isArray(messages)) {
703
+ * console.log('New messages:', messages);
704
+ * } else {
705
+ * console.log('Scenario finished:', messages.success);
706
+ * }
707
+ * ```
575
708
  */
576
709
  async step() {
577
710
  const result = await this._step();
@@ -595,6 +728,34 @@ var ScenarioExecution = class {
595
728
  this.removePendingAgent(nextAgent);
596
729
  return await this.callAgent(idx, currentRole);
597
730
  }
731
+ /**
732
+ * Calls a specific agent to generate a response or make a decision.
733
+ *
734
+ * This method is the core of agent interaction. It prepares the agent's input
735
+ * by combining the conversation history with any pending messages that have been
736
+ * broadcast to this agent, then calls the agent and processes its response.
737
+ *
738
+ * The agent input includes:
739
+ * - Full conversation history (this.state.messages)
740
+ * - New messages that have been broadcast to this agent (this.pendingMessages.get(idx))
741
+ * - The role the agent is being asked to play
742
+ * - Whether this is a judgment request (for judge agents)
743
+ * - Current scenario state and configuration
744
+ *
745
+ * After the agent responds:
746
+ * - Performance timing is recorded
747
+ * - Pending messages for this agent are cleared (they've been processed)
748
+ * - If the agent returns a ScenarioResult, it's returned immediately
749
+ * - Otherwise, the agent's messages are added to the conversation and broadcast
750
+ *
751
+ * @param idx - The index of the agent in the agents array
752
+ * @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
753
+ * @param judgmentRequest - Whether this is a judgment request (for judge agents)
754
+ * @returns A promise that resolves with either:
755
+ * - Array of messages if the agent generated a response, or
756
+ * - ScenarioResult if the agent made a final decision
757
+ * @throws Error if the agent call fails
758
+ */
598
759
  async callAgent(idx, role, judgmentRequest = false) {
599
760
  const agent2 = this.agents[idx];
600
761
  const startTime = Date.now();
@@ -607,29 +768,55 @@ var ScenarioExecution = class {
607
768
  scenarioState: this.state,
608
769
  scenarioConfig: this.config
609
770
  };
610
- const agentResponse = await agent2.call(agentInput);
611
- const endTime = Date.now();
612
- this.addAgentTime(idx, endTime - startTime);
613
- this.pendingMessages.delete(idx);
614
- if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
615
- return agentResponse;
616
- }
617
- const currentAgentTime = this.agentTimes.get(idx) ?? 0;
618
- this.agentTimes.set(idx, currentAgentTime + (Date.now() - startTime));
619
- const messages = convertAgentReturnTypesToMessages(
620
- agentResponse,
621
- role === "User" /* USER */ ? "user" : "assistant"
622
- );
623
- for (const message2 of messages) {
624
- this.state.addMessage(message2);
625
- this.broadcastMessage(message2, idx);
771
+ try {
772
+ const agentResponse = await agent2.call(agentInput);
773
+ const endTime = Date.now();
774
+ this.addAgentTime(idx, endTime - startTime);
775
+ this.pendingMessages.delete(idx);
776
+ if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
777
+ return agentResponse;
778
+ }
779
+ const currentAgentTime = this.agentTimes.get(idx) ?? 0;
780
+ this.agentTimes.set(idx, currentAgentTime + (Date.now() - startTime));
781
+ const messages = convertAgentReturnTypesToMessages(
782
+ agentResponse,
783
+ role === "User" /* USER */ ? "user" : "assistant"
784
+ );
785
+ for (const message2 of messages) {
786
+ this.state.addMessage(message2);
787
+ this.broadcastMessage(message2, idx);
788
+ }
789
+ return messages;
790
+ } catch (error) {
791
+ this.logger.error(
792
+ `[${this.config.id}] Error calling agent ${agent2.constructor.name}`,
793
+ {
794
+ error: error instanceof Error ? error.message : String(error),
795
+ agent: agent2.constructor.name,
796
+ agentInput
797
+ }
798
+ );
799
+ throw error;
626
800
  }
627
- return messages;
628
801
  }
629
802
  /**
630
803
  * Adds a message to the conversation history.
631
- * This is part of the `ScenarioExecutionLike` interface used by script steps.
632
- * @param message The message to add.
804
+ *
805
+ * This method is part of the ScenarioExecutionLike interface used by script steps.
806
+ * It automatically routes the message to the appropriate agent based on the message role:
807
+ * - "user" messages are routed to USER role agents
808
+ * - "assistant" messages are routed to AGENT role agents
809
+ * - Other message types are added directly to the conversation
810
+ *
811
+ * @param message - The CoreMessage to add to the conversation
812
+ *
813
+ * @example
814
+ * ```typescript
815
+ * await execution.message({
816
+ * role: "user",
817
+ * content: "Hello, how are you?"
818
+ * });
819
+ * ```
633
820
  */
634
821
  async message(message2) {
635
822
  if (message2.role === "user") {
@@ -642,42 +829,134 @@ var ScenarioExecution = class {
642
829
  }
643
830
  }
644
831
  /**
645
- * Executes a user turn.
646
- * If content is provided, it's used as the user's message.
647
- * If not, the user simulator agent is called to generate a message.
648
- * This is part of the `ScenarioExecutionLike` interface used by script steps.
649
- * @param content The optional content of the user's message.
832
+ * Executes a user turn in the conversation.
833
+ *
834
+ * If content is provided, it's used directly as the user's message. If not provided,
835
+ * the user simulator agent is called to generate an appropriate response based on
836
+ * the current conversation context.
837
+ *
838
+ * This method is part of the ScenarioExecutionLike interface used by script steps.
839
+ *
840
+ * @param content - Optional content for the user's message. Can be a string or CoreMessage.
841
+ * If not provided, the user simulator agent will generate the content.
842
+ *
843
+ * @example
844
+ * ```typescript
845
+ * // Use provided content
846
+ * await execution.user("What's the weather like?");
847
+ *
848
+ * // Let user simulator generate content
849
+ * await execution.user();
850
+ *
851
+ * // Use a CoreMessage object
852
+ * await execution.user({
853
+ * role: "user",
854
+ * content: "Tell me a joke"
855
+ * });
856
+ * ```
650
857
  */
651
858
  async user(content) {
652
859
  await this.scriptCallAgent("User" /* USER */, content);
653
860
  }
654
861
  /**
655
- * Executes an agent turn.
656
- * If content is provided, it's used as the agent's message.
657
- * If not, the agent under test is called to generate a response.
658
- * This is part of the `ScenarioExecutionLike` interface used by script steps.
659
- * @param content The optional content of the agent's message.
862
+ * Executes an agent turn in the conversation.
863
+ *
864
+ * If content is provided, it's used directly as the agent's response. If not provided,
865
+ * the agent under test is called to generate a response based on the current conversation
866
+ * context and any pending messages.
867
+ *
868
+ * This method is part of the ScenarioExecutionLike interface used by script steps.
869
+ *
870
+ * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
871
+ * If not provided, the agent under test will generate the response.
872
+ *
873
+ * @example
874
+ * ```typescript
875
+ * // Let agent generate response
876
+ * await execution.agent();
877
+ *
878
+ * // Use provided content
879
+ * await execution.agent("The weather is sunny today!");
880
+ *
881
+ * // Use a CoreMessage object
882
+ * await execution.agent({
883
+ * role: "assistant",
884
+ * content: "I'm here to help you with weather information."
885
+ * });
886
+ * ```
660
887
  */
661
888
  async agent(content) {
662
889
  await this.scriptCallAgent("Agent" /* AGENT */, content);
663
890
  }
664
891
  /**
665
892
  * Invokes the judge agent to evaluate the current state of the conversation.
666
- * This is part of the `ScenarioExecutionLike` interface used by script steps.
667
- * @param content Optional message to pass to the judge.
668
- * @returns A promise that resolves with the scenario result if the judge makes a final decision, otherwise null.
893
+ *
894
+ * The judge agent analyzes the conversation history and determines whether the
895
+ * scenario criteria have been met. This can result in either:
896
+ * - A final scenario result (success/failure) if the judge makes a decision
897
+ * - Null if the judge needs more information or conversation to continue
898
+ *
899
+ * This method is part of the ScenarioExecutionLike interface used by script steps.
900
+ *
901
+ * @param content - Optional message to pass to the judge agent for additional context
902
+ * @returns A promise that resolves with:
903
+ * - ScenarioResult if the judge makes a final decision, or
904
+ * - Null if the conversation should continue
905
+ *
906
+ * @example
907
+ * ```typescript
908
+ * // Let judge evaluate current state
909
+ * const result = await execution.judge();
910
+ * if (result) {
911
+ * console.log(`Judge decided: ${result.success ? 'pass' : 'fail'}`);
912
+ * }
913
+ *
914
+ * // Provide additional context to judge
915
+ * const result = await execution.judge("Please consider the user's satisfaction level");
916
+ * ```
669
917
  */
670
918
  async judge(content) {
671
919
  return await this.scriptCallAgent("Judge" /* JUDGE */, content, true);
672
920
  }
673
921
  /**
674
922
  * Lets the scenario proceed automatically for a specified number of turns.
675
- * This simulates the natural flow of conversation between agents.
676
- * This is part of the `ScenarioExecutionLike` interface used by script steps.
677
- * @param turns The number of turns to proceed. If undefined, runs until a conclusion or max turns is reached.
678
- * @param onTurn A callback executed at the end of each turn.
679
- * @param onStep A callback executed after each agent interaction.
680
- * @returns A promise that resolves with the scenario result if a conclusion is reached.
923
+ *
924
+ * This method is a script step that simulates natural conversation flow by allowing
925
+ * agents to interact automatically without explicit script steps. It can trigger
926
+ * multiple agent interactions across multiple turns, making it useful for testing
927
+ * scenarios where you want to see how agents behave in extended conversations.
928
+ *
929
+ * Unlike other script steps that typically trigger one agent interaction each,
930
+ * this step can trigger many agent interactions depending on the number of turns
931
+ * and the agents' behavior.
932
+ *
933
+ * The method will continue until:
934
+ * - The specified number of turns is reached
935
+ * - A final scenario result is determined
936
+ * - The maximum turns limit is reached
937
+ *
938
+ * @param turns - The number of turns to proceed. If undefined, runs until a conclusion
939
+ * or max turns is reached
940
+ * @param onTurn - Optional callback executed at the end of each turn. Receives the
941
+ * current execution state
942
+ * @param onStep - Optional callback executed after each agent interaction. Receives
943
+ * the current execution state
944
+ * @returns A promise that resolves with:
945
+ * - ScenarioResult if a conclusion is reached during the proceeding, or
946
+ * - Null if the specified turns complete without conclusion
947
+ *
948
+ * @example
949
+ * ```typescript
950
+ * // Proceed for 5 turns
951
+ * const result = await execution.proceed(5);
952
+ *
953
+ * // Proceed until conclusion with callbacks
954
+ * const result = await execution.proceed(
955
+ * undefined,
956
+ * (state) => console.log(`Turn ${state.currentTurn} completed`),
957
+ * (state) => console.log(`Agent interaction completed, ${state.messages.length} messages`)
958
+ * );
959
+ * ```
681
960
  */
682
961
  async proceed(turns, onTurn, onStep) {
683
962
  let initialTurn = this.state.currentTurn;
@@ -695,9 +974,26 @@ var ScenarioExecution = class {
695
974
  }
696
975
  /**
697
976
  * Immediately ends the scenario with a success verdict.
698
- * This is part of the `ScenarioExecutionLike` interface used by script steps.
699
- * @param reasoning An optional explanation for the success.
700
- * @returns A promise that resolves with the final successful scenario result.
977
+ *
978
+ * This method forces the scenario to end successfully, regardless of the current
979
+ * conversation state. It's useful for scenarios where you want to explicitly
980
+ * mark success based on specific conditions or external factors.
981
+ *
982
+ * This method is part of the ScenarioExecutionLike interface used by script steps.
983
+ *
984
+ * @param reasoning - Optional explanation for why the scenario is being marked as successful
985
+ * @returns A promise that resolves with the final successful scenario result
986
+ *
987
+ * @example
988
+ * ```typescript
989
+ * // Mark success with default reasoning
990
+ * const result = await execution.succeed();
991
+ *
992
+ * // Mark success with custom reasoning
993
+ * const result = await execution.succeed(
994
+ * "User successfully completed the onboarding flow"
995
+ * );
996
+ * ```
701
997
  */
702
998
  async succeed(reasoning) {
703
999
  return {
@@ -710,9 +1006,26 @@ var ScenarioExecution = class {
710
1006
  }
711
1007
  /**
712
1008
  * Immediately ends the scenario with a failure verdict.
713
- * This is part of the `ScenarioExecutionLike` interface used by script steps.
714
- * @param reasoning An optional explanation for the failure.
715
- * @returns A promise that resolves with the final failed scenario result.
1009
+ *
1010
+ * This method forces the scenario to end with failure, regardless of the current
1011
+ * conversation state. It's useful for scenarios where you want to explicitly
1012
+ * mark failure based on specific conditions or external factors.
1013
+ *
1014
+ * This method is part of the ScenarioExecutionLike interface used by script steps.
1015
+ *
1016
+ * @param reasoning - Optional explanation for why the scenario is being marked as failed
1017
+ * @returns A promise that resolves with the final failed scenario result
1018
+ *
1019
+ * @example
1020
+ * ```typescript
1021
+ * // Mark failure with default reasoning
1022
+ * const result = await execution.fail();
1023
+ *
1024
+ * // Mark failure with custom reasoning
1025
+ * const result = await execution.fail(
1026
+ * "Agent failed to provide accurate weather information"
1027
+ * );
1028
+ * ```
716
1029
  */
717
1030
  async fail(reasoning) {
718
1031
  return {
@@ -723,16 +1036,95 @@ var ScenarioExecution = class {
723
1036
  unmetCriteria: []
724
1037
  };
725
1038
  }
1039
+ /**
1040
+ * Adds execution time for a specific agent to the performance tracking.
1041
+ *
1042
+ * This method is used internally to track how long each agent takes to respond,
1043
+ * which is included in the final scenario result for performance analysis.
1044
+ * The accumulated time for each agent is used to calculate total agent response
1045
+ * times in the scenario result.
1046
+ *
1047
+ * @param agentIdx - The index of the agent in the agents array
1048
+ * @param time - The execution time in milliseconds to add to the agent's total
1049
+ *
1050
+ * @example
1051
+ * ```typescript
1052
+ * // This is typically called internally by the execution engine
1053
+ * execution.addAgentTime(0, 1500); // Agent at index 0 took 1.5 seconds
1054
+ * ```
1055
+ */
726
1056
  addAgentTime(agentIdx, time) {
727
1057
  const currentTime = this.agentTimes.get(agentIdx) || 0;
728
1058
  this.agentTimes.set(agentIdx, currentTime + time);
729
1059
  }
1060
+ /**
1061
+ * Checks if a partial result has been set for the scenario.
1062
+ *
1063
+ * This method is used internally to determine if a scenario has already reached
1064
+ * a conclusion (success or failure) but hasn't been finalized yet. Partial results
1065
+ * are typically set by agents that make final decisions (like judge agents) and
1066
+ * are later finalized with the complete message history.
1067
+ *
1068
+ * @returns True if a partial result exists, false otherwise
1069
+ *
1070
+ * @example
1071
+ * ```typescript
1072
+ * // This is typically used internally by the execution engine
1073
+ * if (execution.hasResult()) {
1074
+ * console.log('Scenario has reached a conclusion');
1075
+ * }
1076
+ * ```
1077
+ */
730
1078
  hasResult() {
731
1079
  return this.partialResult !== null;
732
1080
  }
1081
+ /**
1082
+ * Sets a partial result for the scenario.
1083
+ *
1084
+ * This method is used internally to store intermediate results that may be
1085
+ * finalized later with the complete message history. Partial results are typically
1086
+ * created by agents that make final decisions (like judge agents) and contain
1087
+ * the success/failure status, reasoning, and criteria evaluation, but not the
1088
+ * complete message history.
1089
+ *
1090
+ * @param result - The partial result without the messages field. Should include
1091
+ * success status, reasoning, and criteria evaluation.
1092
+ *
1093
+ * @example
1094
+ * ```typescript
1095
+ * // This is typically called internally by agents that make final decisions
1096
+ * execution.setResult({
1097
+ * success: true,
1098
+ * reasoning: "Agent provided accurate weather information",
1099
+ * metCriteria: ["Provides accurate weather data"],
1100
+ * unmetCriteria: []
1101
+ * });
1102
+ * ```
1103
+ */
733
1104
  setResult(result) {
734
1105
  this.partialResult = result;
735
1106
  }
1107
+ /**
1108
+ * Internal method to handle script step calls to agents.
1109
+ *
1110
+ * This method is the core logic for executing script steps that involve agent
1111
+ * interactions. It handles finding the appropriate agent for the given role,
1112
+ * managing turn progression, and executing the agent's response.
1113
+ *
1114
+ * The method will:
1115
+ * - Find the next available agent for the specified role
1116
+ * - Progress to a new turn if no agent is available
1117
+ * - Execute the agent with the provided content or let it generate content
1118
+ * - Handle judgment requests for judge agents
1119
+ * - Return a final result if the agent makes a decision
1120
+ *
1121
+ * @param role - The role of the agent to call (USER, AGENT, or JUDGE)
1122
+ * @param content - Optional content to use instead of letting the agent generate it
1123
+ * @param judgmentRequest - Whether this is a judgment request (for judge agents)
1124
+ * @returns A promise that resolves with a ScenarioResult if the agent makes a final
1125
+ * decision, or null if the conversation should continue
1126
+ * @throws Error if no agent is found for the specified role
1127
+ */
736
1128
  async scriptCallAgent(role, content, judgmentRequest = false) {
737
1129
  this.consumeUntilRole(role);
738
1130
  let index = -1;
@@ -784,6 +1176,21 @@ var ScenarioExecution = class {
784
1176
  }
785
1177
  return null;
786
1178
  }
1179
+ /**
1180
+ * Resets the scenario execution to its initial state.
1181
+ *
1182
+ * This method is called at the beginning of each execution to ensure a clean
1183
+ * state. It creates a new execution state, initializes agents, sets up the
1184
+ * first turn, and clears any pending messages or partial results.
1185
+ *
1186
+ * The reset process:
1187
+ * - Creates a new ScenarioExecutionState with the current config
1188
+ * - Sets up the thread ID (generates new one if not provided)
1189
+ * - Initializes all agents
1190
+ * - Starts the first turn
1191
+ * - Records the start time for performance tracking
1192
+ * - Clears any pending messages
1193
+ */
787
1194
  reset() {
788
1195
  this.state = new ScenarioExecutionState(this.config);
789
1196
  this.state.threadId = this.config.threadId || generateThreadId();
@@ -801,6 +1208,16 @@ var ScenarioExecution = class {
801
1208
  }
802
1209
  return { idx: -1, agent: null };
803
1210
  }
1211
+ /**
1212
+ * Starts a new turn in the scenario execution.
1213
+ *
1214
+ * This method is called when transitioning to a new turn. It resets the pending
1215
+ * agents and roles for the turn, allowing all agents to participate again in
1216
+ * the new turn. The turn counter is incremented to track the current turn number.
1217
+ *
1218
+ * A turn represents a cycle where agents can take actions. Each turn can involve
1219
+ * multiple agent interactions as agents respond to each other's messages.
1220
+ */
804
1221
  newTurn() {
805
1222
  this.pendingAgentsOnTurn = new Set(this.agents);
806
1223
  this.pendingRolesOnTurn = [
@@ -843,6 +1260,23 @@ var ScenarioExecution = class {
843
1260
  this.pendingRolesOnTurn.pop();
844
1261
  }
845
1262
  }
1263
+ /**
1264
+ * Creates a failure result when the maximum number of turns is reached.
1265
+ *
1266
+ * This method is called when the scenario execution reaches the maximum number
1267
+ * of turns without reaching a conclusion. It creates a failure result with
1268
+ * appropriate reasoning and includes performance metrics.
1269
+ *
1270
+ * The result includes:
1271
+ * - All messages from the conversation
1272
+ * - Failure reasoning explaining the turn limit was reached
1273
+ * - Empty met criteria (since no conclusion was reached)
1274
+ * - All judge criteria as unmet (since no evaluation was completed)
1275
+ * - Total execution time and agent response times
1276
+ *
1277
+ * @param errorMessage - Optional custom error message to use instead of the default
1278
+ * @returns A ScenarioResult indicating failure due to reaching max turns
1279
+ */
846
1280
  reachedMaxTurns(errorMessage) {
847
1281
  var _a;
848
1282
  const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
@@ -903,7 +1337,7 @@ var ScenarioExecution = class {
903
1337
  this.emitEvent({
904
1338
  ...this.makeBaseEvent({ scenarioRunId }),
905
1339
  type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
906
- messages: message_conversion_default(this.state.messages)
1340
+ messages: convert_core_messages_to_agui_messages_default(this.state.messages)
907
1341
  // Add any other required fields from MessagesSnapshotEventSchema
908
1342
  });
909
1343
  }
@@ -934,8 +1368,31 @@ var ScenarioExecution = class {
934
1368
  /**
935
1369
  * Distributes a message to all other agents in the scenario.
936
1370
  *
937
- * @param message - The message to broadcast.
938
- * @param fromAgentIdx - The index of the agent that sent the message, to avoid echoing.
1371
+ * This method implements the message broadcasting system that allows agents to
1372
+ * "hear" messages from other agents. When an agent sends a message, it needs to
1373
+ * be distributed to all other agents so they can respond appropriately.
1374
+ *
1375
+ * The broadcasting process:
1376
+ * 1. Iterates through all agents in the scenario
1377
+ * 2. Skips the agent that sent the message (to avoid echo)
1378
+ * 3. Adds the message to each agent's pending message queue
1379
+ * 4. Agents will receive these messages when they're called next
1380
+ *
1381
+ * This creates a realistic conversation environment where agents can see
1382
+ * the full conversation history and respond contextually.
1383
+ *
1384
+ * @param message - The message to broadcast to all other agents
1385
+ * @param fromAgentIdx - The index of the agent that sent the message (to avoid echoing back to sender)
1386
+ *
1387
+ * @example
1388
+ * ```typescript
1389
+ * // When agent 0 sends a message, it gets broadcast to agents 1 and 2
1390
+ * execution.broadcastMessage(
1391
+ * { role: "user", content: "Hello" },
1392
+ * 0 // fromAgentIdx
1393
+ * );
1394
+ * // Now agents 1 and 2 have this message in their pendingMessages queue
1395
+ * ```
939
1396
  */
940
1397
  broadcastMessage(message2, fromAgentIdx) {
941
1398
  for (let idx = 0; idx < this.agents.length; idx++) {
@@ -946,6 +1403,58 @@ var ScenarioExecution = class {
946
1403
  this.pendingMessages.get(idx).push(message2);
947
1404
  }
948
1405
  }
1406
+ /**
1407
+ * Executes a single script step with proper error handling and logging.
1408
+ *
1409
+ * This method is responsible for executing each script step function with
1410
+ * comprehensive error handling and logging. It provides the execution context
1411
+ * to the script step and handles any errors that occur during execution.
1412
+ *
1413
+ * The method:
1414
+ * - Logs the start of script step execution
1415
+ * - Calls the script step function with the current state and execution context
1416
+ * - Logs the completion of the script step
1417
+ * - Handles and logs any errors that occur
1418
+ * - Re-throws errors to maintain the original error context
1419
+ *
1420
+ * @param scriptStep - The script step function to execute (user, agent, judge, etc.)
1421
+ * @param stepIndex - The index of the script step for logging and debugging context
1422
+ * @returns The result of the script step execution (void, ScenarioResult, or null)
1423
+ * @throws Error if the script step throws an error (preserves original error)
1424
+ */
1425
+ async executeScriptStep(scriptStep, stepIndex) {
1426
+ const functionString = scriptStep.toString();
1427
+ try {
1428
+ this.logger.debug(
1429
+ `[${this.config.id}] Executing script step ${stepIndex + 1}`,
1430
+ {
1431
+ stepIndex,
1432
+ function: functionString
1433
+ }
1434
+ );
1435
+ const result = await scriptStep(this.state, this);
1436
+ this.logger.debug(
1437
+ `[${this.config.id}] Script step ${stepIndex + 1} completed`,
1438
+ {
1439
+ stepIndex,
1440
+ hasResult: result !== null && result !== void 0,
1441
+ resultType: typeof result
1442
+ }
1443
+ );
1444
+ return result;
1445
+ } catch (error) {
1446
+ const errorMessage = error instanceof Error ? error.message : String(error);
1447
+ this.logger.error(
1448
+ `[${this.config.id}] Script step ${stepIndex + 1} failed`,
1449
+ {
1450
+ stepIndex,
1451
+ error: errorMessage,
1452
+ function: functionString
1453
+ }
1454
+ );
1455
+ throw error;
1456
+ }
1457
+ }
949
1458
  };
950
1459
  function convertAgentReturnTypesToMessages(response, role) {
951
1460
  if (typeof response === "string")
@@ -954,6 +1463,19 @@ function convertAgentReturnTypesToMessages(response, role) {
954
1463
  if (typeof response === "object" && "role" in response) return [response];
955
1464
  return [];
956
1465
  }
1466
+ function extractErrorInfo(error) {
1467
+ if (error instanceof Error) {
1468
+ return {
1469
+ name: error.name,
1470
+ message: error.message,
1471
+ stack: error.stack
1472
+ };
1473
+ }
1474
+ return {
1475
+ name: typeof error,
1476
+ message: String(error)
1477
+ };
1478
+ }
957
1479
 
958
1480
  // src/runner/index.ts
959
1481
  var runner_exports = {};
@@ -1024,9 +1546,10 @@ async function run(cfg) {
1024
1546
  let eventBus = null;
1025
1547
  let subscription = null;
1026
1548
  try {
1549
+ const envConfig = getEnv();
1027
1550
  eventBus = new EventBus({
1028
- endpoint: env.LANGWATCH_ENDPOINT,
1029
- apiKey: env.LANGWATCH_API_KEY
1551
+ endpoint: envConfig.LANGWATCH_ENDPOINT,
1552
+ apiKey: envConfig.LANGWATCH_API_KEY
1030
1553
  });
1031
1554
  eventBus.listen();
1032
1555
  subscription = eventBus.subscribeTo(execution.events$);
@@ -1107,6 +1630,7 @@ export {
1107
1630
  JudgeAgentAdapter,
1108
1631
  ScenarioExecution,
1109
1632
  ScenarioExecutionState,
1633
+ StateChangeEventType,
1110
1634
  UserSimulatorAgentAdapter,
1111
1635
  agent,
1112
1636
  allAgentRoles,