npm - @langwatch/scenario - Versions diffs - 0.2.13 → 0.4.0 - Mend

@langwatch/scenario 0.2.13 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +36 -9
package/dist/index.d.mts +433 -256
package/dist/index.d.ts +433 -256
package/dist/index.js +2221 -516
package/dist/index.mjs +2611 -303
package/dist/integrations/vitest/config.mjs +0 -2
package/dist/integrations/vitest/reporter.js +36 -11
package/dist/integrations/vitest/reporter.mjs +159 -8
package/dist/integrations/vitest/setup-global.mjs +0 -2
package/dist/integrations/vitest/setup.js +85 -53
package/dist/integrations/vitest/setup.mjs +619 -18
package/package.json +46 -30
package/dist/chunk-6SKQWXT7.mjs +0 -528
package/dist/chunk-7P6ASYW6.mjs +0 -9
package/dist/chunk-OL4RFXV4.mjs +0 -133

package/dist/index.js CHANGED Viewed

@@ -33,9 +33,11 @@ __export(index_exports, {
   AgentAdapter: () => AgentAdapter,
   AgentRole: () => AgentRole,
   DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
-  DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
   DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
   JudgeAgentAdapter: () => JudgeAgentAdapter,
+  JudgeSpanCollector: () => JudgeSpanCollector,
+  JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
+  RealtimeAgentAdapter: () => RealtimeAgentAdapter,
   ScenarioExecution: () => ScenarioExecution,
   ScenarioExecutionState: () => ScenarioExecutionState,
   StateChangeEventType: () => StateChangeEventType,
@@ -47,6 +49,8 @@ __export(index_exports, {
   fail: () => fail,
   judge: () => judge,
   judgeAgent: () => judgeAgent,
+  judgeSpanCollector: () => judgeSpanCollector,
+  judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
   message: () => message,
   proceed: () => proceed,
   run: () => run,
@@ -58,132 +62,53 @@ __export(index_exports, {
 });
 module.exports = __toCommonJS(index_exports);
-// src/agents/index.ts
-var agents_exports = {};
-__export(agents_exports, {
-  judgeAgent: () => judgeAgent,
-  userSimulatorAgent: () => userSimulatorAgent
-});
-// src/agents/judge-agent.ts
-var import_ai = require("ai");
-var import_zod3 = require("zod");
-// src/domain/index.ts
-var domain_exports = {};
-__export(domain_exports, {
-  AgentAdapter: () => AgentAdapter,
-  AgentRole: () => AgentRole,
-  DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
-  DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
-  DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
-  JudgeAgentAdapter: () => JudgeAgentAdapter,
-  UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
-  allAgentRoles: () => allAgentRoles,
-  defineConfig: () => defineConfig,
-  scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
-});
-// src/domain/core/config.ts
-var import_zod = require("zod");
-var DEFAULT_TEMPERATURE = 0;
-var scenarioProjectConfigSchema = import_zod.z.object({
-  defaultModel: import_zod.z.object({
-    model: import_zod.z.custom(),
-    temperature: import_zod.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
-    maxTokens: import_zod.z.number().optional()
-  }).optional(),
-  headless: import_zod.z.boolean().optional().default(
-    typeof process !== "undefined" ? !["false", "0"].includes(process.env.SCENARIO_HEADLESS || "false") : false
-  )
-}).strict();
-function defineConfig(config2) {
-  return config2;
-}
-// src/domain/agents/index.ts
-var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
-  AgentRole2["USER"] = "User";
-  AgentRole2["AGENT"] = "Agent";
-  AgentRole2["JUDGE"] = "Judge";
-  return AgentRole2;
-})(AgentRole || {});
-var allAgentRoles = [
-  "User" /* USER */,
-  "Agent" /* AGENT */,
-  "Judge" /* JUDGE */
-];
-var AgentAdapter = class {
-  role = "Agent" /* AGENT */;
-};
-var UserSimulatorAgentAdapter = class {
-  role = "User" /* USER */;
-};
-var JudgeAgentAdapter = class {
-  role = "Judge" /* JUDGE */;
-};
-// src/domain/scenarios/index.ts
-var DEFAULT_MAX_TURNS = 10;
-var DEFAULT_VERBOSE = false;
+// src/tracing/setup.ts
+var import_node = require("langwatch/observability/node");
-// src/agents/utils.ts
-var toolMessageRole = "tool";
-var assistantMessageRole = "assistant";
-var userMessageRole = "user";
-var groupMessagesByToolBoundaries = (messages) => {
-  const segments = [];
-  let currentSegment = [];
-  for (const message2 of messages) {
-    currentSegment.push(message2);
-    if (message2.role === toolMessageRole) {
-      segments.push(currentSegment);
-      currentSegment = [];
-    }
+// src/agents/judge/judge-span-collector.ts
+var import_observability = require("langwatch/observability");
+var JudgeSpanCollector = class {
+  spans = [];
+  onStart() {
   }
-  if (currentSegment.length > 0) {
-    segments.push(currentSegment);
+  onEnd(span) {
+    this.spans.push(span);
   }
-  return segments;
-};
-var segmentHasToolMessages = (segment) => {
-  return segment.some((message2) => {
-    if (message2.role === toolMessageRole) return true;
-    if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
-      return message2.content.some((part) => part.type === "tool-call");
+  forceFlush() {
+    return Promise.resolve();
+  }
+  shutdown() {
+    this.spans = [];
+    return Promise.resolve();
+  }
+  /**
+   * Retrieves all spans associated with a specific thread.
+   * @param threadId - The thread identifier to filter spans by
+   * @returns Array of spans for the given thread
+   */
+  getSpansForThread(threadId) {
+    const spanMap = /* @__PURE__ */ new Map();
+    for (const span of this.spans) {
+      spanMap.set(span.spanContext().spanId, span);
     }
-    return false;
-  });
-};
-var reverseSegmentRoles = (segment) => {
-  return segment.map((message2) => {
-    const hasStringContent = typeof message2.content === "string";
-    if (!hasStringContent) return message2;
-    const roleMap = {
-      [userMessageRole]: assistantMessageRole,
-      [assistantMessageRole]: userMessageRole
-    };
-    const newRole = roleMap[message2.role];
-    if (!newRole) return message2;
-    return {
-      role: newRole,
-      content: message2.content
+    const belongsToThread = (span) => {
+      var _a;
+      if (span.attributes[import_observability.attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
+        return true;
+      }
+      const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
+      if (parentId && spanMap.has(parentId)) {
+        return belongsToThread(spanMap.get(parentId));
+      }
+      return false;
     };
-  });
-};
-var messageRoleReversal = (messages) => {
-  const segments = groupMessagesByToolBoundaries(messages);
-  const processedSegments = segments.map(
-    (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
-  );
-  return processedSegments.flat();
-};
-var criterionToParamName = (criterion) => {
-  return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
+    return this.spans.filter(belongsToThread);
+  }
 };
+var judgeSpanCollector = new JudgeSpanCollector();
 // src/config/env.ts
-var import_zod2 = require("zod");
+var import_v4 = require("zod/v4");
 // src/config/log-levels.ts
 var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
@@ -196,37 +121,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
 var LOG_LEVELS = Object.values(LogLevel);
 // src/config/env.ts
-var envSchema = import_zod2.z.object({
+var envSchema = import_v4.z.object({
   /**
    * LangWatch API key for event reporting.
    * If not provided, events will not be sent to LangWatch.
    */
-  LANGWATCH_API_KEY: import_zod2.z.string().optional(),
+  LANGWATCH_API_KEY: import_v4.z.string().optional(),
   /**
    * LangWatch endpoint URL for event reporting.
    * Defaults to the production LangWatch endpoint.
    */
-  LANGWATCH_ENDPOINT: import_zod2.z.string().url().optional().default("https://app.langwatch.ai"),
+  LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
   /**
    * Disables simulation report info messages when set to any truthy value.
    * Useful for CI/CD environments or when you want cleaner output.
    */
-  SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod2.z.string().optional().transform((val) => Boolean(val)),
+  SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
   /**
    * Node environment - affects logging and behavior.
    * Defaults to 'development' if not specified.
    */
-  NODE_ENV: import_zod2.z.enum(["development", "production", "test"]).default("development"),
+  NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
   /**
    * Case-insensitive log level for the scenario package.
    * Defaults to 'info' if not specified.
    */
-  LOG_LEVEL: import_zod2.z.string().toUpperCase().pipe(import_zod2.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
+  LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
   /**
    * Scenario batch run ID.
    * If not provided, a random ID will be generated.
    */
-  SCENARIO_BATCH_RUN_ID: import_zod2.z.string().optional()
+  SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
 });
 function getEnv() {
   return envSchema.parse(process.env);
@@ -236,6 +161,79 @@ function getEnv() {
 var import_promises = __toESM(require("fs/promises"));
 var import_node_path = __toESM(require("path"));
 var import_node_url = require("url");
+// src/domain/index.ts
+var domain_exports = {};
+__export(domain_exports, {
+  AgentAdapter: () => AgentAdapter,
+  AgentRole: () => AgentRole,
+  DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
+  DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
+  JudgeAgentAdapter: () => JudgeAgentAdapter,
+  UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
+  allAgentRoles: () => allAgentRoles,
+  defineConfig: () => defineConfig,
+  scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
+});
+// src/domain/core/config.ts
+var import_v43 = require("zod/v4");
+// src/domain/core/schemas/model.schema.ts
+var import_v42 = require("zod/v4");
+// src/domain/core/constants.ts
+var DEFAULT_TEMPERATURE = 0;
+// src/domain/core/schemas/model.schema.ts
+var modelSchema = import_v42.z.object({
+  model: import_v42.z.custom((val) => Boolean(val), {
+    message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
+  }).describe("The OpenAI Language Model to use for generating responses."),
+  temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
+  maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
+});
+// src/domain/core/config.ts
+var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
+var scenarioProjectConfigSchema = import_v43.z.object({
+  defaultModel: modelSchema.optional(),
+  headless: import_v43.z.boolean().optional().default(headless)
+}).strict();
+function defineConfig(config2) {
+  return config2;
+}
+// src/domain/agents/index.ts
+var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
+  AgentRole2["USER"] = "User";
+  AgentRole2["AGENT"] = "Agent";
+  AgentRole2["JUDGE"] = "Judge";
+  return AgentRole2;
+})(AgentRole || {});
+var allAgentRoles = [
+  "User" /* USER */,
+  "Agent" /* AGENT */,
+  "Judge" /* JUDGE */
+];
+var AgentAdapter = class {
+  name;
+  role = "Agent" /* AGENT */;
+};
+var UserSimulatorAgentAdapter = class extends AgentAdapter {
+  name = "UserSimulatorAgent";
+  role = "User" /* USER */;
+};
+var JudgeAgentAdapter = class extends AgentAdapter {
+  name = "JudgeAgent";
+  role = "Judge" /* JUDGE */;
+};
+// src/domain/scenarios/index.ts
+var DEFAULT_MAX_TURNS = 10;
+var DEFAULT_VERBOSE = false;
+// src/config/load.ts
 async function loadScenarioProjectConfig() {
   const cwd = process.cwd();
   const configNames = [
@@ -267,14 +265,14 @@ async function loadScenarioProjectConfig() {
 // src/utils/logger.ts
 var Logger = class _Logger {
-  constructor(context) {
-    this.context = context;
+  constructor(context2) {
+    this.context = context2;
   }
   /**
    * Creates a logger with context (e.g., class name)
    */
-  static create(context) {
-    return new _Logger(context);
+  static create(context2) {
+    return new _Logger(context2);
   }
   /**
    * Returns the current log level from environment.
@@ -373,131 +371,612 @@ async function getProjectConfig() {
   return config;
 }
-// src/utils/config.ts
-function mergeConfig(config2, projectConfig) {
-  if (!projectConfig) {
-    return config2;
-  }
-  return {
-    ...projectConfig.defaultModel,
-    ...config2
-  };
-}
-function mergeAndValidateConfig(config2, projectConfig) {
-  var _a;
-  const mergedConfig = mergeConfig(config2, projectConfig);
-  mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
-  if (!mergedConfig.model) {
-    throw new Error("Model is required");
-  }
-  return mergedConfig;
-}
-// src/agents/judge-agent.ts
-function buildSystemPrompt(criteria, description) {
-  const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
-  return `
-<role>
-You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
-</role>
-<goal>
-Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
-If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
-</goal>
+// src/tracing/setup.ts
+var envConfig = getEnv();
+var observabilityHandle = (0, import_node.setupObservability)({
+  langwatch: {
+    apiKey: envConfig.LANGWATCH_API_KEY,
+    endpoint: envConfig.LANGWATCH_ENDPOINT
+  },
+  spanProcessors: [judgeSpanCollector]
+});
-<scenario>
-${description}
-</scenario>
+// src/agents/index.ts
+var agents_exports = {};
+__export(agents_exports, {
+  JudgeSpanCollector: () => JudgeSpanCollector,
+  JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
+  RealtimeAgentAdapter: () => RealtimeAgentAdapter,
+  judgeAgent: () => judgeAgent,
+  judgeSpanCollector: () => judgeSpanCollector,
+  judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
+  userSimulatorAgent: () => userSimulatorAgent
+});
-<criteria>
-${criteriaList}
-</criteria>
+// src/agents/judge/judge-agent.ts
+var import_ai2 = require("ai");
+var import_v44 = require("zod/v4");
-<rules>
-- Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
-- DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
-</rules>
-`.trim();
-}
-function buildContinueTestTool() {
-  return (0, import_ai.tool)({
-    description: "Continue the test with the next step",
-    parameters: import_zod3.z.object({})
-  });
-}
-function buildFinishTestTool(criteria) {
-  const criteriaNames = criteria.map(criterionToParamName);
-  return (0, import_ai.tool)({
-    description: "Complete the test with a final verdict",
-    parameters: import_zod3.z.object({
-      criteria: import_zod3.z.object(
-        Object.fromEntries(
-          criteriaNames.map((name, idx) => [
-            name,
-            import_zod3.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
-          ])
-        )
-      ).strict().describe("Strict verdict for each criterion"),
-      reasoning: import_zod3.z.string().describe("Explanation of what the final verdict should be"),
-      verdict: import_zod3.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
-    })
-  });
-}
-var JudgeAgent = class extends JudgeAgentAdapter {
-  constructor(cfg) {
-    super();
-    this.cfg = cfg;
-    this.criteria = cfg.criteria;
-    this.role = "Judge" /* JUDGE */;
-  }
-  logger = new Logger("JudgeAgent");
-  role = "Judge" /* JUDGE */;
-  criteria;
-  async call(input) {
-    var _a;
-    const cfg = this.cfg;
-    const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
-    const messages = [
-      { role: "system", content: systemPrompt },
-      ...input.messages
-    ];
-    const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
-    const projectConfig = await getProjectConfig();
-    const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
-    if (!mergedConfig.model) {
-      throw new Error("Model is required for the judge agent");
+// src/agents/judge/judge-utils.ts
+function truncateBase64Media(value) {
+  var _a;
+  if (typeof value === "string") {
+    const dataUrlMatch = value.match(
+      /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
+    );
+    if (dataUrlMatch) {
+      const mimeType = dataUrlMatch[1];
+      const mediaType = dataUrlMatch[2].toUpperCase();
+      const size = dataUrlMatch[3].length;
+      return `[${mediaType}: ${mimeType}, ~${size} bytes]`;
     }
-    const tools = {
-      continue_test: buildContinueTestTool(),
-      finish_test: buildFinishTestTool(cfg.criteria)
-    };
-    const enforceJudgement = input.judgmentRequest;
-    const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
-    if (enforceJudgement && !hasCriteria) {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map(truncateBase64Media);
+  }
+  if (value && typeof value === "object") {
+    const obj = value;
+    if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
+      const mediaType = obj.mediaType;
+      const category = ((_a = mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
       return {
-        success: false,
-        messages: [],
-        reasoning: "JudgeAgent: No criteria was provided to be judged against",
-        metCriteria: [],
-        unmetCriteria: []
+        ...obj,
+        data: `[${category}: ${mediaType}, ~${obj.data.length} bytes]`
       };
     }
-    const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
-    const completion = await this.generateText({
-      model: mergedConfig.model,
-      messages,
-      temperature: mergedConfig.temperature ?? 0,
-      maxTokens: mergedConfig.maxTokens,
+    if (obj.type === "image" && typeof obj.image === "string") {
+      const imageData = obj.image;
+      const dataUrlMatch = imageData.match(
+        /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
+      );
+      if (dataUrlMatch) {
+        return {
+          ...obj,
+          image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
+        };
+      }
+      if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
+        return {
+          ...obj,
+          image: `[IMAGE: unknown, ~${imageData.length} bytes]`
+        };
+      }
+    }
+    const result = {};
+    for (const [key, val] of Object.entries(obj)) {
+      result[key] = truncateBase64Media(val);
+    }
+    return result;
+  }
+  return value;
+}
+var JudgeUtils = {
+  /**
+   * Builds a minimal transcript from messages for judge evaluation.
+   * Truncates base64 media to reduce token usage.
+   * @param messages - Array of CoreMessage from conversation
+   * @returns Plain text transcript with one message per line
+   */
+  buildTranscriptFromMessages(messages) {
+    return messages.map((msg) => {
+      const truncatedContent = truncateBase64Media(msg.content);
+      return `${msg.role}: ${JSON.stringify(truncatedContent)}`;
+    }).join("\n");
+  }
+};
+// src/agents/llm-invoker.factory.ts
+var import_ai = require("ai");
+var createLLMInvoker = (logger2) => {
+  return async (params) => {
+    try {
+      return await (0, import_ai.generateText)({
+        ...params,
+        experimental_telemetry: { isEnabled: true }
+      });
+    } catch (error) {
+      logger2.error("Error generating text", { error });
+      throw error;
+    }
+  };
+};
+// src/agents/utils.ts
+var toolMessageRole = "tool";
+var assistantMessageRole = "assistant";
+var userMessageRole = "user";
+var groupMessagesByToolBoundaries = (messages) => {
+  const segments = [];
+  let currentSegment = [];
+  for (const message2 of messages) {
+    currentSegment.push(message2);
+    if (message2.role === toolMessageRole) {
+      segments.push(currentSegment);
+      currentSegment = [];
+    }
+  }
+  if (currentSegment.length > 0) {
+    segments.push(currentSegment);
+  }
+  return segments;
+};
+var segmentHasToolMessages = (segment) => {
+  return segment.some((message2) => {
+    if (message2.role === toolMessageRole) return true;
+    if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
+      return message2.content.some((part) => part.type === "tool-call");
+    }
+    return false;
+  });
+};
+var reverseSegmentRoles = (segment) => {
+  return segment.map((message2) => {
+    const hasStringContent = typeof message2.content === "string";
+    if (!hasStringContent) return message2;
+    const roleMap = {
+      [userMessageRole]: assistantMessageRole,
+      [assistantMessageRole]: userMessageRole
+    };
+    const newRole = roleMap[message2.role];
+    if (!newRole) return message2;
+    return {
+      role: newRole,
+      content: message2.content
+    };
+  });
+};
+var messageRoleReversal = (messages) => {
+  const segments = groupMessagesByToolBoundaries(messages);
+  const processedSegments = segments.map(
+    (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
+  );
+  return processedSegments.flat();
+};
+var criterionToParamName = (criterion) => {
+  return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
+};
+// src/agents/judge/judge-span-digest-formatter.ts
+var import_observability2 = require("langwatch/observability");
+// src/agents/judge/deep-transform.ts
+function deepTransform(value, fn) {
+  const result = fn(value);
+  if (result !== value) return result;
+  if (Array.isArray(value)) {
+    return value.map((v) => deepTransform(v, fn));
+  }
+  if (value !== null && typeof value === "object") {
+    const out = {};
+    for (const [k, v] of Object.entries(value)) {
+      out[k] = deepTransform(v, fn);
+    }
+    return out;
+  }
+  return value;
+}
+// src/agents/judge/string-deduplicator.ts
+var StringDeduplicator = class {
+  seen = /* @__PURE__ */ new Map();
+  threshold;
+  constructor(params) {
+    this.threshold = params.threshold;
+  }
+  /**
+   * Resets seen strings for a new digest.
+   */
+  reset() {
+    this.seen.clear();
+  }
+  /**
+   * Processes a string, returning duplicate marker if seen before.
+   * @param str - String to process
+   * @returns Original string or duplicate marker
+   */
+  process(str) {
+    if (str.length < this.threshold) return str;
+    const key = this.normalize(str);
+    if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
+    this.seen.set(key, true);
+    return str;
+  }
+  /**
+   * Normalizes string for comparison (whitespace, case).
+   */
+  normalize(str) {
+    return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
+  }
+};
+// src/agents/judge/truncate-media.ts
+function truncateMediaUrl(str) {
+  const match = str.match(
+    /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
+  );
+  if (!match) return str;
+  const [, mimeType, category, data] = match;
+  return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
+}
+function truncateMediaPart(v) {
+  var _a;
+  if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
+  const obj = v;
+  if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
+    const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
+    return {
+      ...obj,
+      data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
+    };
+  }
+  if (obj.type === "image" && typeof obj.image === "string") {
+    const imageData = obj.image;
+    const dataUrlMatch = imageData.match(
+      /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
+    );
+    if (dataUrlMatch) {
+      return {
+        ...obj,
+        image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
+      };
+    }
+    if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
+      return {
+        ...obj,
+        image: `[IMAGE: unknown, ~${imageData.length} bytes]`
+      };
+    }
+  }
+  return null;
+}
+// src/agents/judge/judge-span-digest-formatter.ts
+var JudgeSpanDigestFormatter = class {
+  logger = new Logger("JudgeSpanDigestFormatter");
+  deduplicator = new StringDeduplicator({ threshold: 50 });
+  /**
+   * Formats spans into a complete digest with full content and nesting.
+   * @param spans - All spans for a thread
+   * @returns Plain text digest
+   */
+  format(spans) {
+    this.deduplicator.reset();
+    this.logger.debug("format() called", {
+      spanCount: spans.length,
+      spanNames: spans.map((s) => s.name)
+    });
+    if (spans.length === 0) {
+      this.logger.debug("No spans to format");
+      return "No spans recorded.";
+    }
+    const sortedSpans = this.sortByStartTime(spans);
+    const tree = this.buildHierarchy(sortedSpans);
+    const totalDuration = this.calculateTotalDuration(sortedSpans);
+    this.logger.debug("Hierarchy built", {
+      rootCount: tree.length,
+      totalDuration
+    });
+    const lines = [
+      `Spans: ${spans.length} | Total Duration: ${this.formatDuration(
+        totalDuration
+      )}`,
+      ""
+    ];
+    let sequence = 1;
+    const rootCount = tree.length;
+    tree.forEach((node, idx) => {
+      sequence = this.renderNode(
+        node,
+        lines,
+        0,
+        sequence,
+        idx === rootCount - 1
+      );
+    });
+    const errors = this.collectErrors(spans);
+    if (errors.length > 0) {
+      lines.push("");
+      lines.push("=== ERRORS ===");
+      errors.forEach((e) => lines.push(e));
+    }
+    return lines.join("\n");
+  }
+  sortByStartTime(spans) {
+    return [...spans].sort((a, b) => {
+      const aTime = this.hrTimeToMs(a.startTime);
+      const bTime = this.hrTimeToMs(b.startTime);
+      return aTime - bTime;
+    });
+  }
+  buildHierarchy(spans) {
+    var _a;
+    const spanMap = /* @__PURE__ */ new Map();
+    const roots = [];
+    for (const span of spans) {
+      spanMap.set(span.spanContext().spanId, { span, children: [] });
+    }
+    for (const span of spans) {
+      const node = spanMap.get(span.spanContext().spanId);
+      const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
+      if (parentId && spanMap.has(parentId)) {
+        spanMap.get(parentId).children.push(node);
+      } else {
+        roots.push(node);
+      }
+    }
+    return roots;
+  }
+  renderNode(node, lines, depth, sequence, isLast = true) {
+    const span = node.span;
+    const duration = this.calculateSpanDuration(span);
+    const timestamp = this.formatTimestamp(span.startTime);
+    const status = this.getStatusIndicator(span);
+    const prefix = this.getTreePrefix(depth, isLast);
+    lines.push(
+      `${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
+    );
+    const attrIndent = this.getAttrIndent(depth, isLast);
+    const attrs = this.cleanAttributes(span.attributes);
+    if (Object.keys(attrs).length > 0) {
+      for (const [key, value] of Object.entries(attrs)) {
+        lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
+      }
+    }
+    if (span.events.length > 0) {
+      for (const event of span.events) {
+        lines.push(`${attrIndent}[event] ${event.name}`);
+        if (event.attributes) {
+          const eventAttrs = this.cleanAttributes(event.attributes);
+          for (const [key, value] of Object.entries(eventAttrs)) {
+            lines.push(`${attrIndent}  ${key}: ${this.formatValue(value)}`);
+          }
+        }
+      }
+    }
+    lines.push("");
+    let nextSeq = sequence + 1;
+    const childCount = node.children.length;
+    node.children.forEach((child, idx) => {
+      nextSeq = this.renderNode(
+        child,
+        lines,
+        depth + 1,
+        nextSeq,
+        idx === childCount - 1
+      );
+    });
+    return nextSeq;
+  }
+  getTreePrefix(depth, isLast) {
+    if (depth === 0) return "";
+    const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
+    return "\u2502   ".repeat(depth - 1) + connector;
+  }
+  getAttrIndent(depth, isLast) {
+    if (depth === 0) return "    ";
+    const continuation = isLast ? "    " : "\u2502   ";
+    return "\u2502   ".repeat(depth - 1) + continuation + "    ";
+  }
+  cleanAttributes(attrs) {
+    const cleaned = {};
+    const seen = /* @__PURE__ */ new Set();
+    const excludedKeys = [
+      import_observability2.attributes.ATTR_LANGWATCH_THREAD_ID,
+      "langwatch.scenario.id",
+      "langwatch.scenario.name"
+    ];
+    for (const [key, value] of Object.entries(attrs)) {
+      if (excludedKeys.includes(key)) {
+        continue;
+      }
+      const cleanKey = key.replace(/^(langwatch)\./, "");
+      if (!seen.has(cleanKey)) {
+        seen.add(cleanKey);
+        cleaned[cleanKey] = value;
+      }
+    }
+    return cleaned;
+  }
+  formatValue(value) {
+    const processed = this.transformValue(value);
+    return typeof processed === "string" ? processed : JSON.stringify(processed);
+  }
+  transformValue(value) {
+    return deepTransform(value, (v) => {
+      const mediaPart = truncateMediaPart(v);
+      if (mediaPart) return mediaPart;
+      if (typeof v !== "string") return v;
+      return this.transformString(v);
+    });
+  }
+  transformString(str) {
+    if (this.looksLikeJson(str)) {
+      try {
+        const processed = this.transformValue(JSON.parse(str));
+        return JSON.stringify(processed);
+      } catch {
+      }
+    }
+    const truncated = truncateMediaUrl(str);
+    if (truncated !== str) return truncated;
+    return this.deduplicator.process(str);
+  }
+  looksLikeJson(str) {
+    const t = str.trim();
+    return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
+  }
+  hrTimeToMs(hrTime) {
+    return hrTime[0] * 1e3 + hrTime[1] / 1e6;
+  }
+  calculateSpanDuration(span) {
+    return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
+  }
+  calculateTotalDuration(spans) {
+    if (spans.length === 0) return 0;
+    const first = this.hrTimeToMs(spans[0].startTime);
+    const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
+    return last - first;
+  }
+  formatDuration(ms) {
+    if (ms < 1e3) return `${Math.round(ms)}ms`;
+    return `${(ms / 1e3).toFixed(2)}s`;
+  }
+  formatTimestamp(hrTime) {
+    const ms = this.hrTimeToMs(hrTime);
+    return new Date(ms).toISOString();
+  }
+  getStatusIndicator(span) {
+    if (span.status.code === 2) {
+      return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
+    }
+    return "";
+  }
+  collectErrors(spans) {
+    return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
+  }
+};
+var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
+// src/agents/judge/judge-agent.ts
+function buildSystemPrompt(criteria, description) {
+  const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
+  return `
+<role>
+You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
+</role>
+<goal>
+Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
+If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
+</goal>
+<scenario>
+${description}
+</scenario>
+<criteria>
+${criteriaList}
+</criteria>
+<rules>
+- Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
+- DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
+</rules>
+`.trim();
+}
+function buildContinueTestTool() {
+  return (0, import_ai2.tool)({
+    description: "Continue the test with the next step",
+    inputSchema: import_v44.z.object({})
+  });
+}
+function buildFinishTestTool(criteria) {
+  const criteriaNames = criteria.map(criterionToParamName);
+  return (0, import_ai2.tool)({
+    description: "Complete the test with a final verdict",
+    inputSchema: import_v44.z.object({
+      criteria: import_v44.z.object(
+        Object.fromEntries(
+          criteriaNames.map((name, idx) => [
+            name,
+            import_v44.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
+          ])
+        )
+      ).strict().describe("Strict verdict for each criterion"),
+      reasoning: import_v44.z.string().describe("Explanation of what the final verdict should be"),
+      verdict: import_v44.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
+    })
+  });
+}
+var JudgeAgent = class extends JudgeAgentAdapter {
+  constructor(cfg) {
+    super();
+    this.cfg = cfg;
+    this.criteria = cfg.criteria;
+    this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
+  }
+  logger = new Logger("JudgeAgent");
+  spanCollector;
+  role = "Judge" /* JUDGE */;
+  criteria;
+  /**
+   * LLM invocation function. Can be overridden to customize LLM behavior.
+   */
+  invokeLLM = createLLMInvoker(this.logger);
+  async call(input) {
+    var _a, _b, _c;
+    this.logger.debug("call() invoked", {
+      threadId: input.threadId,
+      currentTurn: input.scenarioState.currentTurn,
+      maxTurns: input.scenarioConfig.maxTurns,
+      judgmentRequest: input.judgmentRequest
+    });
+    const digest = this.getOpenTelemetryTracesDigest(input.threadId);
+    this.logger.debug("OpenTelemetry traces built", { digest });
+    const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
+    const contentForJudge = `
+    <transcript>
+    ${transcript}
+    </transcript>
+    <opentelemetry_traces>
+    ${digest}
+    </opentelemetry_traces>
+    `;
+    const cfg = this.cfg;
+    const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
+    const messages = [
+      { role: "system", content: systemPrompt },
+      { role: "user", content: contentForJudge }
+    ];
+    const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
+    const projectConfig = await getProjectConfig();
+    const mergedConfig = modelSchema.parse({
+      ...projectConfig == null ? void 0 : projectConfig.defaultModel,
+      ...cfg
+    });
+    const tools = {
+      continue_test: buildContinueTestTool(),
+      finish_test: buildFinishTestTool(cfg.criteria)
+    };
+    const enforceJudgement = input.judgmentRequest;
+    const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
+    if (enforceJudgement && !hasCriteria) {
+      return {
+        success: false,
+        reasoning: "JudgeAgent: No criteria was provided to be judged against",
+        metCriteria: [],
+        unmetCriteria: []
+      };
+    }
+    const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
+    this.logger.debug("Calling LLM", {
+      model: mergedConfig.model,
+      toolChoice,
+      isLastMessage,
+      enforceJudgement
+    });
+    const completion = await this.invokeLLM({
+      model: mergedConfig.model,
+      messages,
+      temperature: mergedConfig.temperature ?? 0,
+      maxOutputTokens: mergedConfig.maxTokens,
       tools,
       toolChoice
     });
+    this.logger.debug("LLM response received", {
+      toolCallCount: ((_a = completion.toolCalls) == null ? void 0 : _a.length) ?? 0,
+      toolCalls: (_b = completion.toolCalls) == null ? void 0 : _b.map((tc) => ({
+        toolName: tc.toolName,
+        args: tc.input
+      }))
+    });
     let args;
-    if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
+    if ((_c = completion.toolCalls) == null ? void 0 : _c.length) {
       const toolCall = completion.toolCalls[0];
       switch (toolCall.toolName) {
         case "finish_test": {
-          args = toolCall.args;
+          args = toolCall.input;
           const verdict = args.verdict || "inconclusive";
           const reasoning = args.reasoning || "No reasoning provided";
           const criteria = args.criteria || {};
@@ -508,20 +987,21 @@ var JudgeAgent = class extends JudgeAgentAdapter {
           const unmetCriteria = cfg.criteria.filter(
             (_, i) => criteriaValues[i] !== "true"
           );
-          return {
+          const result = {
             success: verdict === "success",
-            messages: input.messages,
             reasoning,
             metCriteria,
             unmetCriteria
           };
+          this.logger.debug("finish_test result", result);
+          return result;
         }
         case "continue_test":
-          return [];
+          this.logger.debug("continue_test - proceeding to next turn");
+          return null;
         default:
           return {
             success: false,
-            messages: input.messages,
             reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
             metCriteria: [],
             unmetCriteria: cfg.criteria
@@ -530,101 +1010,1184 @@ var JudgeAgent = class extends JudgeAgentAdapter {
     }
     return {
       success: false,
-      messages: input.messages,
       reasoning: `JudgeAgent: No tool call found in LLM output`,
       metCriteria: [],
       unmetCriteria: cfg.criteria
     };
   }
-  async generateText(input) {
-    try {
-      return await (0, import_ai.generateText)(input);
-    } catch (error) {
-      this.logger.error("Error generating text", { error });
-      throw error;
-    }
+  getOpenTelemetryTracesDigest(threadId) {
+    const spans = this.spanCollector.getSpansForThread(threadId);
+    const digest = judgeSpanDigestFormatter.format(spans);
+    return digest;
   }
 };
 var judgeAgent = (cfg) => {
   return new JudgeAgent(cfg);
 };
-// src/agents/user-simulator-agent.ts
-var import_ai2 = require("ai");
-function buildSystemPrompt2(description) {
-  return `
-<role>
-You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
-Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
-</role>
+// src/agents/user-simulator-agent.ts
+function buildSystemPrompt2(description) {
+  return `
+<role>
+You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
+Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
+</role>
+<goal>
+Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
+</goal>
+<scenario>
+${description}
+</scenario>
+<rules>
+- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
+</rules>
+`.trim();
+}
+var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
+  constructor(cfg) {
+    super();
+    this.cfg = cfg;
+  }
+  logger = new Logger(this.constructor.name);
+  /**
+   * LLM invocation function. Can be overridden to customize LLM behavior.
+   */
+  invokeLLM = createLLMInvoker(this.logger);
+  call = async (input) => {
+    const config2 = this.cfg;
+    const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
+    const messages = [
+      { role: "system", content: systemPrompt },
+      { role: "assistant", content: "Hello, how can I help you today" },
+      ...input.messages
+    ];
+    const projectConfig = await getProjectConfig();
+    const mergedConfig = modelSchema.parse({
+      ...projectConfig == null ? void 0 : projectConfig.defaultModel,
+      ...config2
+    });
+    const reversedMessages = messageRoleReversal(messages);
+    const completion = await this.invokeLLM({
+      model: mergedConfig.model,
+      messages: reversedMessages,
+      temperature: mergedConfig.temperature,
+      maxOutputTokens: mergedConfig.maxTokens
+    });
+    const messageContent = completion.text;
+    if (!messageContent) {
+      throw new Error("No response content from LLM");
+    }
+    return { role: "user", content: messageContent };
+  };
+};
+var userSimulatorAgent = (config2) => {
+  return new UserSimulatorAgent(config2);
+};
+// src/agents/realtime/realtime-agent.adapter.ts
+var import_events = require("events");
+// src/agents/realtime/message-processor.ts
+var MessageProcessor = class {
+  /**
+   * Processes audio message content and extracts base64 audio data
+   *
+   * @param content - The message content to process
+   * @returns Base64 audio data string or null if no audio found
+   * @throws {Error} If audio data is invalid
+   */
+  processAudioMessage(content) {
+    if (!Array.isArray(content)) {
+      return null;
+    }
+    for (const part of content) {
+      if (typeof part === "object" && part !== null && "type" in part && part.type === "file" && "mediaType" in part && typeof part.mediaType === "string" && part.mediaType.startsWith("audio/")) {
+        if (!("data" in part) || typeof part.data !== "string") {
+          throw new Error(
+            `Audio data must be base64 string, got: ${typeof part.data}`
+          );
+        }
+        if (!part.data || part.data.length === 0) {
+          throw new Error(
+            `Audio message has no data. Part: ${JSON.stringify(part)}`
+          );
+        }
+        return part.data;
+      }
+    }
+    return null;
+  }
+  /**
+   * Extracts text content from message content
+   *
+   * @param content - The message content to process
+   * @returns Text string or empty string if no text found
+   */
+  extractTextMessage(content) {
+    return typeof content === "string" ? content : "";
+  }
+  /**
+   * Validates that a message has either text or audio content
+   *
+   * @param content - The message content to validate
+   * @returns True if the message has valid content
+   */
+  hasValidContent(content) {
+    const hasText = this.extractTextMessage(content).length > 0;
+    const hasAudio = this.processAudioMessage(content) !== null;
+    return hasText || hasAudio;
+  }
+};
+// src/agents/realtime/realtime-event-handler.ts
+var RealtimeEventHandler = class {
+  /**
+   * Creates a new RealtimeEventHandler instance
+   * @param session - The RealtimeSession to listen to events from
+   */
+  constructor(session) {
+    this.session = session;
+    this.ensureEventListeners();
+  }
+  currentResponse = "";
+  currentAudioChunks = [];
+  responseResolver = null;
+  errorRejecter = null;
+  listenersSetup = false;
+  /**
+   * Gets the transport from the session
+   */
+  getTransport() {
+    const sessionWithTransport = this.session;
+    return sessionWithTransport.transport ?? null;
+  }
+  /**
+   * Ensures event listeners are set up, retrying if transport not available
+   */
+  ensureEventListeners() {
+    if (this.listenersSetup) return;
+    const transport = this.getTransport();
+    if (!transport) {
+      setTimeout(() => this.ensureEventListeners(), 100);
+      return;
+    }
+    this.setupEventListeners();
+  }
+  /**
+   * Sets up event listeners for the RealtimeSession transport layer
+   */
+  setupEventListeners() {
+    if (this.listenersSetup) return;
+    const transport = this.getTransport();
+    if (!transport) {
+      console.error("\u274C Transport not available on session");
+      return;
+    }
+    transport.on("response.output_audio_transcript.delta", (event) => {
+      const deltaEvent = event;
+      if (typeof deltaEvent.delta === "string") {
+        this.currentResponse += deltaEvent.delta;
+      }
+    });
+    transport.on("response.output_audio.delta", (event) => {
+      const deltaEvent = event;
+      if (typeof deltaEvent.delta === "string") {
+        this.currentAudioChunks.push(deltaEvent.delta);
+      }
+    });
+    transport.on("response.done", () => {
+      const fullAudio = this.currentAudioChunks.join("");
+      const audioResponse = {
+        transcript: this.currentResponse,
+        audio: fullAudio
+      };
+      if (this.responseResolver) {
+        this.responseResolver(audioResponse);
+        this.reset();
+      }
+    });
+    transport.on("error", (error) => {
+      console.error(`\u274C Transport error:`, error);
+      if (this.errorRejecter) {
+        const errorObj = error instanceof Error ? error : new Error(String(error));
+        this.errorRejecter(errorObj);
+        this.reset();
+      }
+    });
+    this.listenersSetup = true;
+  }
+  /**
+   * Waits for the agent response with timeout
+   *
+   * @param timeout - Maximum time to wait in milliseconds
+   * @returns Promise that resolves with the audio response event
+   * @throws {Error} If timeout occurs or transport error happens
+   */
+  waitForResponse(timeout) {
+    return new Promise((resolve, reject) => {
+      this.responseResolver = resolve;
+      this.errorRejecter = reject;
+      const timeoutId = setTimeout(() => {
+        if (this.responseResolver) {
+          this.reset();
+          reject(new Error(`Agent response timeout after ${timeout}ms`));
+        }
+      }, timeout);
+      const originalResolver = resolve;
+      this.responseResolver = (value) => {
+        clearTimeout(timeoutId);
+        originalResolver(value);
+      };
+    });
+  }
+  /**
+   * Resets the internal state for the next response
+   */
+  reset() {
+    this.responseResolver = null;
+    this.errorRejecter = null;
+    this.currentResponse = "";
+    this.currentAudioChunks = [];
+  }
+};
+// src/agents/realtime/response-formatter.ts
+var ResponseFormatter = class {
+  /**
+   * Formats an audio response event into Scenario framework format
+   *
+   * @param audioEvent - The audio response event from the Realtime API
+   * @returns Formatted assistant message with audio and text content
+   */
+  formatAudioResponse(audioEvent) {
+    return {
+      role: "assistant",
+      content: [
+        { type: "text", text: audioEvent.transcript },
+        { type: "file", mediaType: "audio/pcm16", data: audioEvent.audio }
+      ]
+    };
+  }
+  /**
+   * Formats a text response for the Scenario framework
+   *
+   * @param text - The text response from the agent
+   * @returns Plain text response string
+   */
+  formatTextResponse(text) {
+    return text;
+  }
+  /**
+   * Creates an initial response message for when no user message exists
+   *
+   * @param audioEvent - The audio response event from the Realtime API
+   * @returns Formatted assistant message for initial responses
+   */
+  formatInitialResponse(audioEvent) {
+    return this.formatAudioResponse(audioEvent);
+  }
+};
+// src/agents/realtime/realtime-agent.adapter.ts
+var RealtimeAgentAdapter = class extends AgentAdapter {
+  /**
+   * Creates a new RealtimeAgentAdapter instance
+   *
+   * The session can be either connected or unconnected.
+   * If unconnected, call connect() with an API key before use.
+   *
+   * @param config - Configuration for the realtime agent adapter
+   */
+  constructor(config2) {
+    super();
+    this.config = config2;
+    this.role = this.config.role;
+    this.name = this.config.agentName;
+    this.session = config2.session;
+    this.eventHandler = new RealtimeEventHandler(this.session);
+  }
+  role;
+  name;
+  session;
+  eventHandler;
+  messageProcessor = new MessageProcessor();
+  responseFormatter = new ResponseFormatter();
+  audioEvents = new import_events.EventEmitter();
+  /**
+   * Get the connect method from the session
+   */
+  async connect(params) {
+    const { apiKey, ...rest } = params ?? {};
+    await this.session.connect({
+      apiKey: apiKey ?? process.env.OPENAI_API_KEY,
+      ...rest
+    });
+  }
+  /**
+   * Closes the session connection
+   */
+  async disconnect() {
+    this.session.close();
+  }
+  /**
+   * Process input and generate response (implements AgentAdapter interface)
+   *
+   * This is called by Scenario framework for each agent turn.
+   * Handles both text and audio input, returns audio message with transcript.
+   *
+   * @param input - Scenario agent input with message history
+   * @returns Agent response as audio message or text
+   */
+  async call(input) {
+    console.log(`\u{1F50A} [${this.name}] being called with role: ${this.role}`);
+    const latestMessage = input.newMessages[input.newMessages.length - 1];
+    if (!latestMessage) {
+      return this.handleInitialResponse();
+    }
+    const audioData = this.messageProcessor.processAudioMessage(
+      latestMessage.content
+    );
+    if (audioData) {
+      return this.handleAudioInput(audioData);
+    }
+    const text = this.messageProcessor.extractTextMessage(
+      latestMessage.content
+    );
+    if (!text) {
+      throw new Error("Message has no text or audio content");
+    }
+    return this.handleTextInput(text);
+  }
+  /**
+   * Handles the initial response when no user message exists
+   */
+  async handleInitialResponse() {
+    console.log(`[${this.name}] First message, creating response`);
+    const sessionWithTransport = this.session;
+    const transport = sessionWithTransport.transport;
+    if (!transport) {
+      throw new Error("Realtime transport not available");
+    }
+    transport.sendEvent({
+      type: "response.create"
+    });
+    const timeout = this.config.responseTimeout ?? 6e4;
+    const response = await this.eventHandler.waitForResponse(timeout);
+    this.audioEvents.emit("audioResponse", response);
+    return this.responseFormatter.formatInitialResponse(response);
+  }
+  /**
+   * Handles audio input from the user
+   */
+  async handleAudioInput(audioData) {
+    const sessionWithTransport = this.session;
+    const transport = sessionWithTransport.transport;
+    if (!transport) {
+      throw new Error("Realtime transport not available");
+    }
+    transport.sendEvent({
+      type: "input_audio_buffer.append",
+      audio: audioData
+    });
+    transport.sendEvent({
+      type: "input_audio_buffer.commit"
+    });
+    transport.sendEvent({
+      type: "response.create"
+    });
+    const timeout = this.config.responseTimeout ?? 6e4;
+    const response = await this.eventHandler.waitForResponse(timeout);
+    this.audioEvents.emit("audioResponse", response);
+    return this.responseFormatter.formatAudioResponse(response);
+  }
+  /**
+   * Handles text input from the user
+   */
+  async handleTextInput(text) {
+    this.session.sendMessage(text);
+    const timeout = this.config.responseTimeout ?? 3e4;
+    const response = await this.eventHandler.waitForResponse(timeout);
+    this.audioEvents.emit("audioResponse", response);
+    return this.responseFormatter.formatTextResponse(response.transcript);
+  }
+  /**
+   * Subscribe to audio response events
+   *
+   * @param callback - Function called when an audio response completes
+   */
+  onAudioResponse(callback) {
+    this.audioEvents.on("audioResponse", callback);
+  }
+  /**
+   * Remove audio response listener
+   *
+   * @param callback - The callback function to remove
+   */
+  offAudioResponse(callback) {
+    this.audioEvents.off("audioResponse", callback);
+  }
+};
+// src/execution/index.ts
+var execution_exports = {};
+__export(execution_exports, {
+  ScenarioExecution: () => ScenarioExecution,
+  ScenarioExecutionState: () => ScenarioExecutionState,
+  StateChangeEventType: () => StateChangeEventType
+});
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
+var _globalThis = typeof globalThis === "object" ? globalThis : global;
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
+var VERSION = "1.9.0";
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
+var re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
+function _makeCompatibilityCheck(ownVersion) {
+  var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
+  var rejectedVersions = /* @__PURE__ */ new Set();
+  var myVersionMatch = ownVersion.match(re);
+  if (!myVersionMatch) {
+    return function() {
+      return false;
+    };
+  }
+  var ownVersionParsed = {
+    major: +myVersionMatch[1],
+    minor: +myVersionMatch[2],
+    patch: +myVersionMatch[3],
+    prerelease: myVersionMatch[4]
+  };
+  if (ownVersionParsed.prerelease != null) {
+    return function isExactmatch(globalVersion) {
+      return globalVersion === ownVersion;
+    };
+  }
+  function _reject(v) {
+    rejectedVersions.add(v);
+    return false;
+  }
+  function _accept(v) {
+    acceptedVersions.add(v);
+    return true;
+  }
+  return function isCompatible2(globalVersion) {
+    if (acceptedVersions.has(globalVersion)) {
+      return true;
+    }
+    if (rejectedVersions.has(globalVersion)) {
+      return false;
+    }
+    var globalVersionMatch = globalVersion.match(re);
+    if (!globalVersionMatch) {
+      return _reject(globalVersion);
+    }
+    var globalVersionParsed = {
+      major: +globalVersionMatch[1],
+      minor: +globalVersionMatch[2],
+      patch: +globalVersionMatch[3],
+      prerelease: globalVersionMatch[4]
+    };
+    if (globalVersionParsed.prerelease != null) {
+      return _reject(globalVersion);
+    }
+    if (ownVersionParsed.major !== globalVersionParsed.major) {
+      return _reject(globalVersion);
+    }
+    if (ownVersionParsed.major === 0) {
+      if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
+        return _accept(globalVersion);
+      }
+      return _reject(globalVersion);
+    }
+    if (ownVersionParsed.minor <= globalVersionParsed.minor) {
+      return _accept(globalVersion);
+    }
+    return _reject(globalVersion);
+  };
+}
+var isCompatible = _makeCompatibilityCheck(VERSION);
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
+var major = VERSION.split(".")[0];
+var GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
+var _global = _globalThis;
+function registerGlobal(type, instance, diag, allowOverride) {
+  var _a;
+  if (allowOverride === void 0) {
+    allowOverride = false;
+  }
+  var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
+    version: VERSION
+  };
+  if (!allowOverride && api[type]) {
+    var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
+    diag.error(err.stack || err.message);
+    return false;
+  }
+  if (api.version !== VERSION) {
+    var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
+    diag.error(err.stack || err.message);
+    return false;
+  }
+  api[type] = instance;
+  diag.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
+  return true;
+}
+function getGlobal(type) {
+  var _a, _b;
+  var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
+  if (!globalVersion || !isCompatible(globalVersion)) {
+    return;
+  }
+  return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
+}
+function unregisterGlobal(type, diag) {
+  diag.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
+  var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
+  if (api) {
+    delete api[type];
+  }
+}
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
+var __read = function(o, n) {
+  var m = typeof Symbol === "function" && o[Symbol.iterator];
+  if (!m) return o;
+  var i = m.call(o), r, ar = [], e;
+  try {
+    while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
+  } catch (error) {
+    e = { error };
+  } finally {
+    try {
+      if (r && !r.done && (m = i["return"])) m.call(i);
+    } finally {
+      if (e) throw e.error;
+    }
+  }
+  return ar;
+};
+var __spreadArray = function(to, from, pack) {
+  if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
+    if (ar || !(i in from)) {
+      if (!ar) ar = Array.prototype.slice.call(from, 0, i);
+      ar[i] = from[i];
+    }
+  }
+  return to.concat(ar || Array.prototype.slice.call(from));
+};
+var DiagComponentLogger = (
+  /** @class */
+  (function() {
+    function DiagComponentLogger2(props) {
+      this._namespace = props.namespace || "DiagComponentLogger";
+    }
+    DiagComponentLogger2.prototype.debug = function() {
+      var args = [];
+      for (var _i = 0; _i < arguments.length; _i++) {
+        args[_i] = arguments[_i];
+      }
+      return logProxy("debug", this._namespace, args);
+    };
+    DiagComponentLogger2.prototype.error = function() {
+      var args = [];
+      for (var _i = 0; _i < arguments.length; _i++) {
+        args[_i] = arguments[_i];
+      }
+      return logProxy("error", this._namespace, args);
+    };
+    DiagComponentLogger2.prototype.info = function() {
+      var args = [];
+      for (var _i = 0; _i < arguments.length; _i++) {
+        args[_i] = arguments[_i];
+      }
+      return logProxy("info", this._namespace, args);
+    };
+    DiagComponentLogger2.prototype.warn = function() {
+      var args = [];
+      for (var _i = 0; _i < arguments.length; _i++) {
+        args[_i] = arguments[_i];
+      }
+      return logProxy("warn", this._namespace, args);
+    };
+    DiagComponentLogger2.prototype.verbose = function() {
+      var args = [];
+      for (var _i = 0; _i < arguments.length; _i++) {
+        args[_i] = arguments[_i];
+      }
+      return logProxy("verbose", this._namespace, args);
+    };
+    return DiagComponentLogger2;
+  })()
+);
+function logProxy(funcName, namespace, args) {
+  var logger2 = getGlobal("diag");
+  if (!logger2) {
+    return;
+  }
+  args.unshift(namespace);
+  return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
+}
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
+var DiagLogLevel;
+(function(DiagLogLevel2) {
+  DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
+  DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
+  DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
+  DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
+  DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
+  DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
+  DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
+})(DiagLogLevel || (DiagLogLevel = {}));
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
+function createLogLevelDiagLogger(maxLevel, logger2) {
+  if (maxLevel < DiagLogLevel.NONE) {
+    maxLevel = DiagLogLevel.NONE;
+  } else if (maxLevel > DiagLogLevel.ALL) {
+    maxLevel = DiagLogLevel.ALL;
+  }
+  logger2 = logger2 || {};
+  function _filterFunc(funcName, theLevel) {
+    var theFunc = logger2[funcName];
+    if (typeof theFunc === "function" && maxLevel >= theLevel) {
+      return theFunc.bind(logger2);
+    }
+    return function() {
+    };
+  }
+  return {
+    error: _filterFunc("error", DiagLogLevel.ERROR),
+    warn: _filterFunc("warn", DiagLogLevel.WARN),
+    info: _filterFunc("info", DiagLogLevel.INFO),
+    debug: _filterFunc("debug", DiagLogLevel.DEBUG),
+    verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
+  };
+}
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
+var __read2 = function(o, n) {
+  var m = typeof Symbol === "function" && o[Symbol.iterator];
+  if (!m) return o;
+  var i = m.call(o), r, ar = [], e;
+  try {
+    while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
+  } catch (error) {
+    e = { error };
+  } finally {
+    try {
+      if (r && !r.done && (m = i["return"])) m.call(i);
+    } finally {
+      if (e) throw e.error;
+    }
+  }
+  return ar;
+};
+var __spreadArray2 = function(to, from, pack) {
+  if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
+    if (ar || !(i in from)) {
+      if (!ar) ar = Array.prototype.slice.call(from, 0, i);
+      ar[i] = from[i];
+    }
+  }
+  return to.concat(ar || Array.prototype.slice.call(from));
+};
+var API_NAME = "diag";
+var DiagAPI = (
+  /** @class */
+  (function() {
+    function DiagAPI2() {
+      function _logProxy(funcName) {
+        return function() {
+          var args = [];
+          for (var _i = 0; _i < arguments.length; _i++) {
+            args[_i] = arguments[_i];
+          }
+          var logger2 = getGlobal("diag");
+          if (!logger2)
+            return;
+          return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
+        };
+      }
+      var self = this;
+      var setLogger = function(logger2, optionsOrLogLevel) {
+        var _a, _b, _c;
+        if (optionsOrLogLevel === void 0) {
+          optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
+        }
+        if (logger2 === self) {
+          var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
+          self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
+          return false;
+        }
+        if (typeof optionsOrLogLevel === "number") {
+          optionsOrLogLevel = {
+            logLevel: optionsOrLogLevel
+          };
+        }
+        var oldLogger = getGlobal("diag");
+        var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
+        if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
+          var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
+          oldLogger.warn("Current logger will be overwritten from " + stack);
+          newLogger.warn("Current logger will overwrite one already registered from " + stack);
+        }
+        return registerGlobal("diag", newLogger, self, true);
+      };
+      self.setLogger = setLogger;
+      self.disable = function() {
+        unregisterGlobal(API_NAME, self);
+      };
+      self.createComponentLogger = function(options) {
+        return new DiagComponentLogger(options);
+      };
+      self.verbose = _logProxy("verbose");
+      self.debug = _logProxy("debug");
+      self.info = _logProxy("info");
+      self.warn = _logProxy("warn");
+      self.error = _logProxy("error");
+    }
+    DiagAPI2.instance = function() {
+      if (!this._instance) {
+        this._instance = new DiagAPI2();
+      }
+      return this._instance;
+    };
+    return DiagAPI2;
+  })()
+);
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
+function createContextKey(description) {
+  return Symbol.for(description);
+}
+var BaseContext = (
+  /** @class */
+  /* @__PURE__ */ (function() {
+    function BaseContext2(parentContext) {
+      var self = this;
+      self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
+      self.getValue = function(key) {
+        return self._currentContext.get(key);
+      };
+      self.setValue = function(key, value) {
+        var context2 = new BaseContext2(self._currentContext);
+        context2._currentContext.set(key, value);
+        return context2;
+      };
+      self.deleteValue = function(key) {
+        var context2 = new BaseContext2(self._currentContext);
+        context2._currentContext.delete(key);
+        return context2;
+      };
+    }
+    return BaseContext2;
+  })()
+);
+var ROOT_CONTEXT = new BaseContext();
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
+var __read3 = function(o, n) {
+  var m = typeof Symbol === "function" && o[Symbol.iterator];
+  if (!m) return o;
+  var i = m.call(o), r, ar = [], e;
+  try {
+    while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
+  } catch (error) {
+    e = { error };
+  } finally {
+    try {
+      if (r && !r.done && (m = i["return"])) m.call(i);
+    } finally {
+      if (e) throw e.error;
+    }
+  }
+  return ar;
+};
+var __spreadArray3 = function(to, from, pack) {
+  if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
+    if (ar || !(i in from)) {
+      if (!ar) ar = Array.prototype.slice.call(from, 0, i);
+      ar[i] = from[i];
+    }
+  }
+  return to.concat(ar || Array.prototype.slice.call(from));
+};
+var NoopContextManager = (
+  /** @class */
+  (function() {
+    function NoopContextManager2() {
+    }
+    NoopContextManager2.prototype.active = function() {
+      return ROOT_CONTEXT;
+    };
+    NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
+      var args = [];
+      for (var _i = 3; _i < arguments.length; _i++) {
+        args[_i - 3] = arguments[_i];
+      }
+      return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
+    };
+    NoopContextManager2.prototype.bind = function(_context, target) {
+      return target;
+    };
+    NoopContextManager2.prototype.enable = function() {
+      return this;
+    };
+    NoopContextManager2.prototype.disable = function() {
+      return this;
+    };
+    return NoopContextManager2;
+  })()
+);
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
+var __read4 = function(o, n) {
+  var m = typeof Symbol === "function" && o[Symbol.iterator];
+  if (!m) return o;
+  var i = m.call(o), r, ar = [], e;
+  try {
+    while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
+  } catch (error) {
+    e = { error };
+  } finally {
+    try {
+      if (r && !r.done && (m = i["return"])) m.call(i);
+    } finally {
+      if (e) throw e.error;
+    }
+  }
+  return ar;
+};
+var __spreadArray4 = function(to, from, pack) {
+  if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
+    if (ar || !(i in from)) {
+      if (!ar) ar = Array.prototype.slice.call(from, 0, i);
+      ar[i] = from[i];
+    }
+  }
+  return to.concat(ar || Array.prototype.slice.call(from));
+};
+var API_NAME2 = "context";
+var NOOP_CONTEXT_MANAGER = new NoopContextManager();
+var ContextAPI = (
+  /** @class */
+  (function() {
+    function ContextAPI2() {
+    }
+    ContextAPI2.getInstance = function() {
+      if (!this._instance) {
+        this._instance = new ContextAPI2();
+      }
+      return this._instance;
+    };
+    ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
+      return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
+    };
+    ContextAPI2.prototype.active = function() {
+      return this._getContextManager().active();
+    };
+    ContextAPI2.prototype.with = function(context2, fn, thisArg) {
+      var _a;
+      var args = [];
+      for (var _i = 3; _i < arguments.length; _i++) {
+        args[_i - 3] = arguments[_i];
+      }
+      return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
+    };
+    ContextAPI2.prototype.bind = function(context2, target) {
+      return this._getContextManager().bind(context2, target);
+    };
+    ContextAPI2.prototype._getContextManager = function() {
+      return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
+    };
+    ContextAPI2.prototype.disable = function() {
+      this._getContextManager().disable();
+      unregisterGlobal(API_NAME2, DiagAPI.instance());
+    };
+    return ContextAPI2;
+  })()
+);
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
+var TraceFlags;
+(function(TraceFlags2) {
+  TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
+  TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
+})(TraceFlags || (TraceFlags = {}));
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
+var INVALID_SPANID = "0000000000000000";
+var INVALID_TRACEID = "00000000000000000000000000000000";
+var INVALID_SPAN_CONTEXT = {
+  traceId: INVALID_TRACEID,
+  spanId: INVALID_SPANID,
+  traceFlags: TraceFlags.NONE
+};
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
+var NonRecordingSpan = (
+  /** @class */
+  (function() {
+    function NonRecordingSpan2(_spanContext) {
+      if (_spanContext === void 0) {
+        _spanContext = INVALID_SPAN_CONTEXT;
+      }
+      this._spanContext = _spanContext;
+    }
+    NonRecordingSpan2.prototype.spanContext = function() {
+      return this._spanContext;
+    };
+    NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
+      return this;
+    };
+    NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
+      return this;
+    };
+    NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
+      return this;
+    };
+    NonRecordingSpan2.prototype.addLink = function(_link) {
+      return this;
+    };
+    NonRecordingSpan2.prototype.addLinks = function(_links) {
+      return this;
+    };
+    NonRecordingSpan2.prototype.setStatus = function(_status) {
+      return this;
+    };
+    NonRecordingSpan2.prototype.updateName = function(_name) {
+      return this;
+    };
+    NonRecordingSpan2.prototype.end = function(_endTime) {
+    };
+    NonRecordingSpan2.prototype.isRecording = function() {
+      return false;
+    };
+    NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
+    };
+    return NonRecordingSpan2;
+  })()
+);
-<goal>
-Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
-</goal>
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
+var SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
+function getSpan(context2) {
+  return context2.getValue(SPAN_KEY) || void 0;
+}
+function getActiveSpan() {
+  return getSpan(ContextAPI.getInstance().active());
+}
+function setSpan(context2, span) {
+  return context2.setValue(SPAN_KEY, span);
+}
+function deleteSpan(context2) {
+  return context2.deleteValue(SPAN_KEY);
+}
+function setSpanContext(context2, spanContext) {
+  return setSpan(context2, new NonRecordingSpan(spanContext));
+}
+function getSpanContext(context2) {
+  var _a;
+  return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
+}
-<scenario>
-${description}
-</scenario>
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
+var VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
+var VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
+function isValidTraceId(traceId) {
+  return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
+}
+function isValidSpanId(spanId) {
+  return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
+}
+function isSpanContextValid(spanContext) {
+  return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
+}
+function wrapSpanContext(spanContext) {
+  return new NonRecordingSpan(spanContext);
+}
-<rules>
-- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
-</rules>
-`.trim();
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
+var contextApi = ContextAPI.getInstance();
+var NoopTracer = (
+  /** @class */
+  (function() {
+    function NoopTracer2() {
+    }
+    NoopTracer2.prototype.startSpan = function(name, options, context2) {
+      if (context2 === void 0) {
+        context2 = contextApi.active();
+      }
+      var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
+      if (root) {
+        return new NonRecordingSpan();
+      }
+      var parentFromContext = context2 && getSpanContext(context2);
+      if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
+        return new NonRecordingSpan(parentFromContext);
+      } else {
+        return new NonRecordingSpan();
+      }
+    };
+    NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
+      var opts;
+      var ctx;
+      var fn;
+      if (arguments.length < 2) {
+        return;
+      } else if (arguments.length === 2) {
+        fn = arg2;
+      } else if (arguments.length === 3) {
+        opts = arg2;
+        fn = arg3;
+      } else {
+        opts = arg2;
+        ctx = arg3;
+        fn = arg4;
+      }
+      var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
+      var span = this.startSpan(name, opts, parentContext);
+      var contextWithSpanSet = setSpan(parentContext, span);
+      return contextApi.with(contextWithSpanSet, fn, void 0, span);
+    };
+    return NoopTracer2;
+  })()
+);
+function isSpanContext(spanContext) {
+  return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
 }
-var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
-  constructor(cfg) {
-    super();
-    this.cfg = cfg;
-  }
-  logger = new Logger(this.constructor.name);
-  call = async (input) => {
-    const config2 = this.cfg;
-    const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
-    const messages = [
-      { role: "system", content: systemPrompt },
-      { role: "assistant", content: "Hello, how can I help you today" },
-      ...input.messages
-    ];
-    const projectConfig = await getProjectConfig();
-    const mergedConfig = mergeAndValidateConfig(config2 ?? {}, projectConfig);
-    if (!mergedConfig.model) {
-      throw new Error("Model is required for the user simulator agent");
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
+var NOOP_TRACER = new NoopTracer();
+var ProxyTracer = (
+  /** @class */
+  (function() {
+    function ProxyTracer2(_provider, name, version, options) {
+      this._provider = _provider;
+      this.name = name;
+      this.version = version;
+      this.options = options;
     }
-    const reversedMessages = messageRoleReversal(messages);
-    const completion = await this.generateText({
-      model: mergedConfig.model,
-      messages: reversedMessages,
-      temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
-      maxTokens: mergedConfig.maxTokens
-    });
-    const messageContent = completion.text;
-    if (!messageContent) {
-      throw new Error("No response content from LLM");
+    ProxyTracer2.prototype.startSpan = function(name, options, context2) {
+      return this._getTracer().startSpan(name, options, context2);
+    };
+    ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
+      var tracer = this._getTracer();
+      return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
+    };
+    ProxyTracer2.prototype._getTracer = function() {
+      if (this._delegate) {
+        return this._delegate;
+      }
+      var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
+      if (!tracer) {
+        return NOOP_TRACER;
+      }
+      this._delegate = tracer;
+      return this._delegate;
+    };
+    return ProxyTracer2;
+  })()
+);
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
+var NoopTracerProvider = (
+  /** @class */
+  (function() {
+    function NoopTracerProvider2() {
     }
-    return { role: "user", content: messageContent };
-  };
-  async generateText(input) {
-    try {
-      return await (0, import_ai2.generateText)(input);
-    } catch (error) {
-      this.logger.error("Error generating text", { error });
-      throw error;
+    NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
+      return new NoopTracer();
+    };
+    return NoopTracerProvider2;
+  })()
+);
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
+var NOOP_TRACER_PROVIDER = new NoopTracerProvider();
+var ProxyTracerProvider = (
+  /** @class */
+  (function() {
+    function ProxyTracerProvider2() {
     }
-  }
-};
-var userSimulatorAgent = (config2) => {
-  return new UserSimulatorAgent(config2);
-};
+    ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
+      var _a;
+      return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
+    };
+    ProxyTracerProvider2.prototype.getDelegate = function() {
+      var _a;
+      return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
+    };
+    ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
+      this._delegate = delegate;
+    };
+    ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
+      var _a;
+      return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
+    };
+    return ProxyTracerProvider2;
+  })()
+);
-// src/execution/index.ts
-var execution_exports = {};
-__export(execution_exports, {
-  ScenarioExecution: () => ScenarioExecution,
-  ScenarioExecutionState: () => ScenarioExecutionState,
-  StateChangeEventType: () => StateChangeEventType
-});
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
+var context = ContextAPI.getInstance();
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
+var API_NAME3 = "trace";
+var TraceAPI = (
+  /** @class */
+  (function() {
+    function TraceAPI2() {
+      this._proxyTracerProvider = new ProxyTracerProvider();
+      this.wrapSpanContext = wrapSpanContext;
+      this.isSpanContextValid = isSpanContextValid;
+      this.deleteSpan = deleteSpan;
+      this.getSpan = getSpan;
+      this.getActiveSpan = getActiveSpan;
+      this.getSpanContext = getSpanContext;
+      this.setSpan = setSpan;
+      this.setSpanContext = setSpanContext;
+    }
+    TraceAPI2.getInstance = function() {
+      if (!this._instance) {
+        this._instance = new TraceAPI2();
+      }
+      return this._instance;
+    };
+    TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
+      var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
+      if (success) {
+        this._proxyTracerProvider.setDelegate(provider);
+      }
+      return success;
+    };
+    TraceAPI2.prototype.getTracerProvider = function() {
+      return getGlobal(API_NAME3) || this._proxyTracerProvider;
+    };
+    TraceAPI2.prototype.getTracer = function(name, version) {
+      return this.getTracerProvider().getTracer(name, version);
+    };
+    TraceAPI2.prototype.disable = function() {
+      unregisterGlobal(API_NAME3, DiagAPI.instance());
+      this._proxyTracerProvider = new ProxyTracerProvider();
+    };
+    return TraceAPI2;
+  })()
+);
+// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
+var trace = TraceAPI.getInstance();
 // src/execution/scenario-execution.ts
+var import_langwatch = require("langwatch");
+var import_observability3 = require("langwatch/observability");
 var import_rxjs2 = require("rxjs");
 // src/execution/scenario-execution-state.ts
@@ -711,9 +2274,13 @@ var ScenarioExecutionState = class {
    * Adds a message to the conversation history.
    *
    * @param message - The message to add.
+   * @param traceId - Optional trace ID to associate with the message.
    */
   addMessage(message2) {
-    const messageWithId = { ...message2, id: generateMessageId() };
+    const messageWithId = {
+      ...message2,
+      id: generateMessageId()
+    };
     this._messages.push(messageWithId);
     this.eventSubject.next({ type: "MESSAGE_ADDED" /* MESSAGE_ADDED */ });
   }
@@ -769,7 +2336,7 @@ var ScenarioExecutionState = class {
 // src/events/schema.ts
 var import_core = require("@ag-ui/core");
-var import_zod4 = require("zod");
+var import_zod = require("zod");
 var Verdict = /* @__PURE__ */ ((Verdict2) => {
   Verdict2["SUCCESS"] = "success";
   Verdict2["FAILURE"] = "failure";
@@ -785,68 +2352,69 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
   ScenarioRunStatus2["FAILED"] = "FAILED";
   return ScenarioRunStatus2;
 })(ScenarioRunStatus || {});
-var baseEventSchema = import_zod4.z.object({
-  type: import_zod4.z.nativeEnum(import_core.EventType),
-  timestamp: import_zod4.z.number(),
-  rawEvent: import_zod4.z.any().optional()
+var baseEventSchema = import_zod.z.object({
+  type: import_zod.z.nativeEnum(import_core.EventType),
+  timestamp: import_zod.z.number(),
+  rawEvent: import_zod.z.any().optional()
 });
-var batchRunIdSchema = import_zod4.z.string();
-var scenarioRunIdSchema = import_zod4.z.string();
-var scenarioIdSchema = import_zod4.z.string();
+var batchRunIdSchema = import_zod.z.string();
+var scenarioRunIdSchema = import_zod.z.string();
+var scenarioIdSchema = import_zod.z.string();
 var baseScenarioEventSchema = baseEventSchema.extend({
   batchRunId: batchRunIdSchema,
   scenarioId: scenarioIdSchema,
   scenarioRunId: scenarioRunIdSchema,
-  scenarioSetId: import_zod4.z.string().optional().default("default")
+  scenarioSetId: import_zod.z.string().optional().default("default")
 });
 var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
-  type: import_zod4.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
-  metadata: import_zod4.z.object({
-    name: import_zod4.z.string().optional(),
-    description: import_zod4.z.string().optional()
+  type: import_zod.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
+  metadata: import_zod.z.object({
+    name: import_zod.z.string().optional(),
+    description: import_zod.z.string().optional()
   })
 });
-var scenarioResultsSchema = import_zod4.z.object({
-  verdict: import_zod4.z.nativeEnum(Verdict),
-  reasoning: import_zod4.z.string().optional(),
-  metCriteria: import_zod4.z.array(import_zod4.z.string()),
-  unmetCriteria: import_zod4.z.array(import_zod4.z.string()),
-  error: import_zod4.z.string().optional()
+var scenarioResultsSchema = import_zod.z.object({
+  verdict: import_zod.z.nativeEnum(Verdict),
+  reasoning: import_zod.z.string().optional(),
+  metCriteria: import_zod.z.array(import_zod.z.string()),
+  unmetCriteria: import_zod.z.array(import_zod.z.string()),
+  error: import_zod.z.string().optional()
 });
 var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
-  type: import_zod4.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
-  status: import_zod4.z.nativeEnum(ScenarioRunStatus),
+  type: import_zod.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
+  status: import_zod.z.nativeEnum(ScenarioRunStatus),
   results: scenarioResultsSchema.optional().nullable()
 });
 var scenarioMessageSnapshotSchema = import_core.MessagesSnapshotEventSchema.merge(
   baseScenarioEventSchema.extend({
-    type: import_zod4.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
+    type: import_zod.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
   })
 );
-var scenarioEventSchema = import_zod4.z.discriminatedUnion("type", [
+var scenarioEventSchema = import_zod.z.discriminatedUnion("type", [
   scenarioRunStartedSchema,
   scenarioRunFinishedSchema,
   scenarioMessageSnapshotSchema
 ]);
-var successSchema = import_zod4.z.object({ success: import_zod4.z.boolean() });
-var errorSchema = import_zod4.z.object({ error: import_zod4.z.string() });
-var stateSchema = import_zod4.z.object({
-  state: import_zod4.z.object({
-    messages: import_zod4.z.array(import_zod4.z.any()),
-    status: import_zod4.z.string()
+var successSchema = import_zod.z.object({ success: import_zod.z.boolean() });
+var errorSchema = import_zod.z.object({ error: import_zod.z.string() });
+var stateSchema = import_zod.z.object({
+  state: import_zod.z.object({
+    messages: import_zod.z.array(import_zod.z.any()),
+    status: import_zod.z.string()
   })
 });
-var runsSchema = import_zod4.z.object({ runs: import_zod4.z.array(import_zod4.z.string()) });
-var eventsSchema = import_zod4.z.object({ events: import_zod4.z.array(scenarioEventSchema) });
+var runsSchema = import_zod.z.object({ runs: import_zod.z.array(import_zod.z.string()) });
+var eventsSchema = import_zod.z.object({ events: import_zod.z.array(scenarioEventSchema) });
 // src/utils/convert-core-messages-to-agui-messages.ts
-function convertCoreMessagesToAguiMessages(coreMessages) {
+function convertModelMessagesToAguiMessages(modelMessages) {
   const aguiMessages = [];
-  for (const msg of coreMessages) {
+  for (const msg of modelMessages) {
     const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
     switch (true) {
       case msg.role === "system":
         aguiMessages.push({
+          trace_id: msg.traceId,
           id,
           role: "system",
           content: msg.content
@@ -854,6 +2422,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
         break;
       case (msg.role === "user" && typeof msg.content === "string"):
         aguiMessages.push({
+          trace_id: msg.traceId,
           id,
           role: "user",
           content: msg.content
@@ -862,6 +2431,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
       // Handle any other user message content format
       case (msg.role === "user" && Array.isArray(msg.content)):
         aguiMessages.push({
+          trace_id: msg.traceId,
           id,
           role: "user",
           content: JSON.stringify(msg.content)
@@ -869,6 +2439,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
         break;
       case (msg.role === "assistant" && typeof msg.content === "string"):
         aguiMessages.push({
+          trace_id: msg.traceId,
           id,
           role: "assistant",
           content: msg.content
@@ -878,6 +2449,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
         const toolCalls = msg.content.filter((p) => p.type === "tool-call");
         const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
         aguiMessages.push({
+          trace_id: msg.traceId,
           id,
           role: "assistant",
           content: JSON.stringify(nonToolCalls),
@@ -886,7 +2458,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
             type: "function",
             function: {
               name: c.toolName,
-              arguments: JSON.stringify(c.args)
+              arguments: JSON.stringify(c.input)
             }
           }))
         });
@@ -894,11 +2466,13 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
       }
       case msg.role === "tool":
         msg.content.map((p, i) => {
+          var _a;
           aguiMessages.push({
+            trace_id: msg.traceId,
             id: `${id}-${i}`,
             role: "tool",
             toolCallId: p.toolCallId,
-            content: JSON.stringify(p.result)
+            content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
           });
         });
         break;
@@ -908,12 +2482,16 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
   }
   return aguiMessages;
 }
-var convert_core_messages_to_agui_messages_default = convertCoreMessagesToAguiMessages;
+var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
 // src/execution/scenario-execution.ts
 var ScenarioExecution = class {
+  /** LangWatch tracer for scenario execution */
+  tracer = (0, import_langwatch.getLangWatchTracer)("@langwatch/scenario");
   /** The current state of the scenario execution */
   state;
+  /** The final result of the scenario execution, set when a conclusion is reached */
+  _result;
   /** Logger for debugging and monitoring */
   logger = new Logger("scenario.execution.ScenarioExecution");
   /** Finalized configuration with all defaults applied */
@@ -932,10 +2510,10 @@ var ScenarioExecution = class {
    * Key: agent index, Value: array of pending messages for that agent
    */
   pendingMessages = /* @__PURE__ */ new Map();
-  /** Intermediate result set by agents that make final decisions */
-  partialResult = null;
   /** Accumulated execution time for each agent (for performance tracking) */
   agentTimes = /* @__PURE__ */ new Map();
+  /** Current turn span for trace context management */
+  currentTurnSpan;
   /** Timestamp when execution started (for total time calculation) */
   totalStartTime = 0;
   /** Event stream for monitoring scenario progress */
@@ -974,7 +2552,7 @@ var ScenarioExecution = class {
   /**
    * Gets the complete conversation history as an array of messages.
    *
-   * @returns Array of CoreMessage objects representing the full conversation
+   * @returns Array of ModelMessage objects representing the full conversation
    */
   get messages() {
     return this.state.messages;
@@ -988,6 +2566,41 @@ var ScenarioExecution = class {
   get threadId() {
     return this.state.threadId;
   }
+  /**
+   * Gets the result of the scenario execution if it has been set.
+   *
+   * @returns The scenario result or undefined if not yet set
+   */
+  get result() {
+    return this._result;
+  }
+  /**
+   * Sets the result of the scenario execution.
+   * This is called when the scenario reaches a conclusion (success or failure).
+   * Automatically includes messages, totalTime, and agentTime from the current execution context.
+   *
+   * @param result - The final scenario result (without messages/timing, which will be added automatically)
+   */
+  setResult(result) {
+    const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
+    const agentTimes = agentRoleAgentsIdx.map(
+      (i) => this.agentTimes.get(i) || 0
+    );
+    const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
+    this._result = {
+      ...result,
+      messages: this.state.messages,
+      totalTime: this.totalTime,
+      agentTime: totalAgentTime
+    };
+    this.logger.debug(`[${this.config.id}] Result set`, {
+      success: result.success,
+      reasoning: result.reasoning,
+      totalTime: this.totalTime,
+      agentTime: totalAgentTime,
+      messageCount: this.state.messages.length
+    });
+  }
   /**
    * The total elapsed time for the scenario execution.
    */
@@ -1021,8 +2634,14 @@ var ScenarioExecution = class {
    * ```
    */
   async execute() {
+    this.logger.debug(`[${this.config.id}] Starting scenario execution`, {
+      name: this.config.name,
+      maxTurns: this.config.maxTurns,
+      scriptLength: this.config.script.length
+    });
     this.reset();
     const scenarioRunId = generateScenarioRunId();
+    this.logger.debug(`[${this.config.id}] Generated run ID: ${scenarioRunId}`);
     this.emitRunStarted({ scenarioRunId });
     const subscription = this.state.events$.pipe(
       (0, import_rxjs2.filter)((event) => event.type === "MESSAGE_ADDED" /* MESSAGE_ADDED */)
@@ -1032,18 +2651,17 @@ var ScenarioExecution = class {
     try {
       for (let i = 0; i < this.config.script.length; i++) {
         const scriptStep = this.config.script[i];
-        const result = await this.executeScriptStep(scriptStep, i);
-        if (result && typeof result === "object" && "success" in result) {
+        await this.executeScriptStep(scriptStep, i);
+        if (this.result) {
           this.emitRunFinished({
             scenarioRunId,
-            status: result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
-            result
+            status: this.result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
+            result: this.result
           });
-          return result;
+          return this.result;
         }
       }
-      this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
-      return this.reachedMaxTurns(
+      this.reachedMaxTurns(
         [
           "Reached end of script without conclusion, add one of the following to the end of the script:",
           "- `Scenario.proceed()` to let the simulation continue to play out",
@@ -1051,20 +2669,21 @@ var ScenarioExecution = class {
           "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
         ].join("\n")
       );
+      this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
+      return this.result;
     } catch (error) {
       const errorInfo = extractErrorInfo(error);
-      const errorResult = {
+      this.setResult({
         success: false,
-        messages: this.state.messages,
         reasoning: `Scenario failed with error: ${errorInfo.message}`,
         metCriteria: [],
         unmetCriteria: [],
         error: JSON.stringify(errorInfo)
-      };
+      });
       this.emitRunFinished({
         scenarioRunId,
         status: "ERROR" /* ERROR */,
-        result: errorResult
+        result: this.result
       });
       throw error;
     } finally {
@@ -1082,50 +2701,66 @@ var ScenarioExecution = class {
    * - Progress to the next turn if needed
    * - Find the next agent that should act
    * - Execute that agent's response
-   * - Return either new messages or a final scenario result
+   * - Set the result if the scenario concludes
    *
    * Note: This method is primarily for debugging or custom execution flows. Most users
    * will use `execute()` to run the entire scenario automatically.
    *
-   * @returns A promise that resolves with either:
-   *   - Array of new messages added during the agent interaction, or
-   *   - A final ScenarioResult if the interaction concludes the scenario
-   * @throws Error if no result is returned from the step
+   * After calling this method, check `this.result` to see if the scenario has concluded.
    *
    * @example
    * ```typescript
    * const execution = new ScenarioExecution(config, script);
    *
    * // Execute one agent interaction at a time
-   * const messages = await execution.step();
-   * if (Array.isArray(messages)) {
-   *   console.log('New messages:', messages);
-   * } else {
-   *   console.log('Scenario finished:', messages.success);
+   * await execution.step();
+   * if (execution.result) {
+   *   console.log('Scenario finished:', execution.result.success);
    * }
    * ```
    */
   async step() {
-    const result = await this._step();
-    if (result === null) throw new Error("No result from step");
-    return result;
+    await this._step();
   }
   async _step(goToNextTurn = true, onTurn) {
+    this.logger.debug(`[${this.config.id}] _step called`, {
+      goToNextTurn,
+      pendingRoles: this.pendingRolesOnTurn,
+      currentTurn: this.state.currentTurn
+    });
     if (this.pendingRolesOnTurn.length === 0) {
-      if (!goToNextTurn) return null;
+      if (!goToNextTurn) {
+        this.logger.debug(
+          `[${this.config.id}] No pending roles, not advancing turn`
+        );
+        return;
+      }
       this.newTurn();
       if (onTurn) await onTurn(this.state);
-      if (this.state.currentTurn >= this.config.maxTurns)
-        return this.reachedMaxTurns();
+      if (this.state.currentTurn >= this.config.maxTurns) {
+        this.logger.debug(
+          `[${this.config.id}] Reached max turns: ${this.state.currentTurn}`
+        );
+        this.reachedMaxTurns();
+        return;
+      }
     }
     const currentRole = this.pendingRolesOnTurn[0];
     const { idx, agent: nextAgent } = this.nextAgentForRole(currentRole);
     if (!nextAgent) {
+      this.logger.debug(
+        `[${this.config.id}] No agent for role ${currentRole}, removing role`
+      );
       this.removePendingRole(currentRole);
       return this._step(goToNextTurn, onTurn);
     }
+    this.logger.debug(`[${this.config.id}] Calling agent`, {
+      role: currentRole,
+      agentIdx: idx,
+      agentName: nextAgent.name ?? nextAgent.constructor.name
+    });
     this.removePendingAgent(nextAgent);
-    return await this.callAgent(idx, currentRole);
+    await this.callAgent(idx, currentRole);
   }
   /**
    * Calls a specific agent to generate a response or make a decision.
@@ -1144,19 +2779,25 @@ var ScenarioExecution = class {
    * After the agent responds:
    * - Performance timing is recorded
    * - Pending messages for this agent are cleared (they've been processed)
-   * - If the agent returns a ScenarioResult, it's returned immediately
+   * - If the agent returns a ScenarioResult, it's set on this.result
    * - Otherwise, the agent's messages are added to the conversation and broadcast
    *
    * @param idx - The index of the agent in the agents array
    * @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
    * @param judgmentRequest - Whether this is a judgment request (for judge agents)
-   * @returns A promise that resolves with either:
-   *   - Array of messages if the agent generated a response, or
-   *   - ScenarioResult if the agent made a final decision
    * @throws Error if the agent call fails
    */
   async callAgent(idx, role, judgmentRequest = false) {
+    var _a;
     const agent2 = this.agents[idx];
+    const agentName = agent2.name ?? agent2.constructor.name;
+    this.logger.debug(`[${this.config.id}] callAgent started`, {
+      agentIdx: idx,
+      role,
+      judgmentRequest,
+      agentName,
+      pendingMessagesCount: ((_a = this.pendingMessages.get(idx)) == null ? void 0 : _a.length) ?? 0
+    });
     const startTime = Date.now();
     const agentInput = {
       threadId: this.state.threadId,
@@ -1167,35 +2808,75 @@ var ScenarioExecution = class {
       scenarioState: this.state,
       scenarioConfig: this.config
     };
+    const agentContext = this.currentTurnSpan ? trace.setSpan(context.active(), this.currentTurnSpan) : context.active();
+    const agentSpanName = `${agentName !== Object.prototype.constructor.name ? agent2.constructor.name : "Agent"}.call`;
     try {
-      const agentResponse = await agent2.call(agentInput);
-      const endTime = Date.now();
-      this.addAgentTime(idx, endTime - startTime);
-      this.pendingMessages.delete(idx);
-      if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
-        return agentResponse;
-      }
-      const currentAgentTime = this.agentTimes.get(idx) ?? 0;
-      this.agentTimes.set(idx, currentAgentTime + (Date.now() - startTime));
-      const messages = convertAgentReturnTypesToMessages(
-        agentResponse,
-        role === "User" /* USER */ ? "user" : "assistant"
-      );
-      for (const message2 of messages) {
-        this.state.addMessage(message2);
-        this.broadcastMessage(message2, idx);
-      }
-      return messages;
-    } catch (error) {
-      this.logger.error(
-        `[${this.config.id}] Error calling agent ${agent2.constructor.name}`,
+      await this.tracer.withActiveSpan(
+        agentSpanName,
         {
-          error: error instanceof Error ? error.message : String(error),
-          agent: agent2.constructor.name,
-          agentInput
+          attributes: {
+            [import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
+          }
+        },
+        agentContext,
+        async (agentSpan) => {
+          agentSpan.setType("agent");
+          agentSpan.setInput("chat_messages", this.state.messages);
+          const agentResponse = await agent2.call(agentInput);
+          const endTime = Date.now();
+          const duration = endTime - startTime;
+          this.logger.debug(`[${this.config.id}] Agent responded`, {
+            agentIdx: idx,
+            duration,
+            responseType: typeof agentResponse,
+            isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
+          });
+          this.addAgentTime(idx, duration);
+          this.pendingMessages.delete(idx);
+          if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
+            this.logger.debug(
+              `[${this.config.id}] Agent returned ScenarioResult`,
+              {
+                success: agentResponse.success
+              }
+            );
+            this.setResult(agentResponse);
+            return;
+          }
+          const messages = convertAgentReturnTypesToMessages(
+            agentResponse,
+            role === "User" /* USER */ ? "user" : "assistant"
+          );
+          if (messages.length > 0) {
+            agentSpan.setOutput("chat_messages", messages);
+          }
+          const metrics = {
+            duration: endTime - startTime
+          };
+          if (agentResponse && typeof agentResponse === "object") {
+            const usage = agentResponse.usage;
+            if (usage) {
+              if (usage.prompt_tokens !== void 0)
+                metrics.promptTokens = usage.prompt_tokens;
+              if (usage.completion_tokens !== void 0)
+                metrics.completionTokens = usage.completion_tokens;
+              if (usage.total_tokens !== void 0)
+                metrics.totalTokens = usage.total_tokens;
+            }
+          }
+          agentSpan.setMetrics(metrics);
+          const traceId = agentSpan.spanContext().traceId.toString();
+          for (const message2 of messages) {
+            this.state.addMessage({
+              ...message2,
+              traceId
+            });
+            this.broadcastMessage(message2, idx);
+          }
         }
       );
-      throw error;
+    } catch (error) {
+      throw new Error(`[${agentName}] ${error}`, { cause: error });
     }
   }
   /**
@@ -1207,7 +2888,7 @@ var ScenarioExecution = class {
    * - "assistant" messages are routed to AGENT role agents
    * - Other message types are added directly to the conversation
    *
-   * @param message - The CoreMessage to add to the conversation
+   * @param message - The ModelMessage to add to the conversation
    *
    * @example
    * ```typescript
@@ -1236,7 +2917,7 @@ var ScenarioExecution = class {
    *
    * This method is part of the ScenarioExecutionLike interface used by script steps.
    *
-   * @param content - Optional content for the user's message. Can be a string or CoreMessage.
+   * @param content - Optional content for the user's message. Can be a string or ModelMessage.
    *                 If not provided, the user simulator agent will generate the content.
    *
    * @example
@@ -1247,7 +2928,7 @@ var ScenarioExecution = class {
    * // Let user simulator generate content
    * await execution.user();
    *
-   * // Use a CoreMessage object
+   * // Use a ModelMessage object
    * await execution.user({
    *   role: "user",
    *   content: "Tell me a joke"
@@ -1266,7 +2947,7 @@ var ScenarioExecution = class {
    *
    * This method is part of the ScenarioExecutionLike interface used by script steps.
    *
-   * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
+   * @param content - Optional content for the agent's response. Can be a string or ModelMessage.
    *                 If not provided, the agent under test will generate the response.
    *
    * @example
@@ -1277,7 +2958,7 @@ var ScenarioExecution = class {
    * // Use provided content
    * await execution.agent("The weather is sunny today!");
    *
-   * // Use a CoreMessage object
+   * // Use a ModelMessage object
    * await execution.agent({
    *   role: "assistant",
    *   content: "I'm here to help you with weather information."
@@ -1358,17 +3039,22 @@ var ScenarioExecution = class {
    * ```
    */
   async proceed(turns, onTurn, onStep) {
+    this.logger.debug(`[${this.config.id}] proceed called`, {
+      turns,
+      currentTurn: this.state.currentTurn
+    });
     let initialTurn = this.state.currentTurn;
     while (true) {
       const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
-      const nextMessage = await this._step(goToNextTurn, onTurn);
+      await this._step(goToNextTurn, onTurn);
       if (initialTurn === null) initialTurn = this.state.currentTurn;
-      if (nextMessage === null) {
-        return null;
+      if (this.result) {
+        return this.result;
       }
       if (onStep) await onStep(this.state);
-      if (nextMessage !== null && typeof nextMessage === "object" && "success" in nextMessage)
-        return nextMessage;
+      if (!goToNextTurn) {
+        return null;
+      }
     }
   }
   /**
@@ -1395,13 +3081,13 @@ var ScenarioExecution = class {
    * ```
    */
   async succeed(reasoning) {
-    return {
+    this.setResult({
       success: true,
-      messages: this.state.messages,
       reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
       metCriteria: [],
       unmetCriteria: []
-    };
+    });
+    return this.result;
   }
   /**
    * Immediately ends the scenario with a failure verdict.
@@ -1427,13 +3113,13 @@ var ScenarioExecution = class {
    * ```
    */
   async fail(reasoning) {
-    return {
+    this.setResult({
       success: false,
-      messages: this.state.messages,
       reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
       metCriteria: [],
       unmetCriteria: []
-    };
+    });
+    return this.result;
   }
   /**
    * Adds execution time for a specific agent to the performance tracking.
@@ -1456,53 +3142,6 @@ var ScenarioExecution = class {
     const currentTime = this.agentTimes.get(agentIdx) || 0;
     this.agentTimes.set(agentIdx, currentTime + time);
   }
-  /**
-   * Checks if a partial result has been set for the scenario.
-   *
-   * This method is used internally to determine if a scenario has already reached
-   * a conclusion (success or failure) but hasn't been finalized yet. Partial results
-   * are typically set by agents that make final decisions (like judge agents) and
-   * are later finalized with the complete message history.
-   *
-   * @returns True if a partial result exists, false otherwise
-   *
-   * @example
-   * ```typescript
-   * // This is typically used internally by the execution engine
-   * if (execution.hasResult()) {
-   *   console.log('Scenario has reached a conclusion');
-   * }
-   * ```
-   */
-  hasResult() {
-    return this.partialResult !== null;
-  }
-  /**
-   * Sets a partial result for the scenario.
-   *
-   * This method is used internally to store intermediate results that may be
-   * finalized later with the complete message history. Partial results are typically
-   * created by agents that make final decisions (like judge agents) and contain
-   * the success/failure status, reasoning, and criteria evaluation, but not the
-   * complete message history.
-   *
-   * @param result - The partial result without the messages field. Should include
-   *                success status, reasoning, and criteria evaluation.
-   *
-   * @example
-   * ```typescript
-   * // This is typically called internally by agents that make final decisions
-   * execution.setResult({
-   *   success: true,
-   *   reasoning: "Agent provided accurate weather information",
-   *   metCriteria: ["Provides accurate weather data"],
-   *   unmetCriteria: []
-   * });
-   * ```
-   */
-  setResult(result) {
-    this.partialResult = result;
-  }
   /**
    * Internal method to handle script step calls to agents.
    *
@@ -1515,7 +3154,7 @@ var ScenarioExecution = class {
    * - Progress to a new turn if no agent is available
    * - Execute the agent with the provided content or let it generate content
    * - Handle judgment requests for judge agents
-   * - Return a final result if the agent makes a decision
+   * - Set the result if the agent makes a decision
    *
    * @param role - The role of the agent to call (USER, AGENT, or JUDGE)
    * @param content - Optional content to use instead of letting the agent generate it
@@ -1525,6 +3164,11 @@ var ScenarioExecution = class {
    * @throws Error if no agent is found for the specified role
    */
   async scriptCallAgent(role, content, judgmentRequest = false) {
+    this.logger.debug(`[${this.config.id}] scriptCallAgent`, {
+      role,
+      hasContent: content !== void 0,
+      judgmentRequest
+    });
     this.consumeUntilRole(role);
     let index = -1;
     let agent2 = null;
@@ -1569,11 +3213,8 @@ var ScenarioExecution = class {
       this.broadcastMessage(message2, index);
       return null;
     }
-    const result = await this.callAgent(index, role, judgmentRequest);
-    if (result && typeof result === "object" && "success" in result) {
-      return result;
-    }
-    return null;
+    await this.callAgent(index, role, judgmentRequest);
+    return this.result ?? null;
   }
   /**
    * Resets the scenario execution to its initial state.
@@ -1589,8 +3230,14 @@ var ScenarioExecution = class {
    * - Starts the first turn
    * - Records the start time for performance tracking
    * - Clears any pending messages
+   * - Clears the result from any previous execution
    */
   reset() {
+    this.logger.debug(`[${this.config.id}] Resetting scenario execution`);
+    if (this.currentTurnSpan) {
+      this.currentTurnSpan.end();
+      this.currentTurnSpan = void 0;
+    }
     this.state = new ScenarioExecutionState(this.config);
     this.state.threadId = this.config.threadId || generateThreadId();
     this.setAgents(this.config.agents);
@@ -1598,6 +3245,11 @@ var ScenarioExecution = class {
     this.state.currentTurn = 0;
     this.totalStartTime = Date.now();
     this.pendingMessages.clear();
+    this._result = void 0;
+    this.logger.debug(`[${this.config.id}] Reset complete`, {
+      threadId: this.state.threadId,
+      agentCount: this.agents.length
+    });
   }
   nextAgentForRole(role) {
     for (const agent2 of this.agents) {
@@ -1618,6 +3270,11 @@ var ScenarioExecution = class {
    * multiple agent interactions as agents respond to each other's messages.
    */
   newTurn() {
+    const previousTurn = this.state.currentTurn;
+    if (this.currentTurnSpan) {
+      this.currentTurnSpan.end();
+      this.currentTurnSpan = void 0;
+    }
     this.pendingAgentsOnTurn = new Set(this.agents);
     this.pendingRolesOnTurn = [
       "User" /* USER */,
@@ -1629,6 +3286,19 @@ var ScenarioExecution = class {
     } else {
       this.state.currentTurn++;
     }
+    this.logger.debug(`[${this.config.id}] New turn started`, {
+      previousTurn,
+      currentTurn: this.state.currentTurn,
+      agentCount: this.agents.length
+    });
+    this.currentTurnSpan = this.tracer.startSpan("Scenario Turn", {
+      attributes: {
+        "scenario.name": this.config.name,
+        "scenario.id": this.config.id,
+        [import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
+        "scenario.turn": this.state.currentTurn
+      }
+    });
   }
   removePendingRole(role) {
     const index = this.pendingRolesOnTurn.indexOf(role);
@@ -1664,7 +3334,7 @@ var ScenarioExecution = class {
    *
    * This method is called when the scenario execution reaches the maximum number
    * of turns without reaching a conclusion. It creates a failure result with
-   * appropriate reasoning and includes performance metrics.
+   * appropriate reasoning and includes performance metrics, then sets it on this.result.
    *
    * The result includes:
    * - All messages from the conversation
@@ -1674,24 +3344,15 @@ var ScenarioExecution = class {
    * - Total execution time and agent response times
    *
    * @param errorMessage - Optional custom error message to use instead of the default
-   * @returns A ScenarioResult indicating failure due to reaching max turns
    */
   reachedMaxTurns(errorMessage) {
     var _a;
-    const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
-    const agentTimes = agentRoleAgentsIdx.map(
-      (i) => this.agentTimes.get(i) || 0
-    );
-    const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
-    return {
+    this.setResult({
       success: false,
-      messages: this.state.messages,
       reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
       metCriteria: [],
-      unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? [],
-      totalTime: this.totalTime,
-      agentTime: totalAgentTime
-    };
+      unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? []
+    });
   }
   getJudgeAgent() {
     return this.agents.find((agent2) => agent2 instanceof JudgeAgentAdapter) ?? null;
@@ -1763,6 +3424,10 @@ var ScenarioExecution = class {
     };
     this.emitEvent(event);
     this.eventSubject.complete();
+    if (this.currentTurnSpan) {
+      this.currentTurnSpan.end();
+      this.currentTurnSpan = void 0;
+    }
   }
   /**
    * Distributes a message to all other agents in the scenario.
@@ -1794,13 +3459,20 @@ var ScenarioExecution = class {
    * ```
    */
   broadcastMessage(message2, fromAgentIdx) {
+    const recipients = [];
     for (let idx = 0; idx < this.agents.length; idx++) {
       if (idx === fromAgentIdx) continue;
       if (!this.pendingMessages.has(idx)) {
         this.pendingMessages.set(idx, []);
       }
       this.pendingMessages.get(idx).push(message2);
+      recipients.push(idx);
     }
+    this.logger.debug(`[${this.config.id}] Broadcast message`, {
+      role: message2.role,
+      fromAgentIdx,
+      recipients
+    });
   }
   /**
    * Executes a single script step with proper error handling and logging.
@@ -1859,7 +3531,8 @@ function convertAgentReturnTypesToMessages(response, role) {
   if (typeof response === "string")
     return [{ role, content: response }];
   if (Array.isArray(response)) return response;
-  if (typeof response === "object" && "role" in response) return [response];
+  if (response && typeof response === "object" && "role" in response)
+    return [response];
   return [];
 }
 function extractErrorInfo(error) {
@@ -1886,9 +3559,27 @@ __export(runner_exports, {
 var import_rxjs3 = require("rxjs");
 // src/events/event-alert-message-logger.ts
+var fs2 = __toESM(require("fs"));
+var os = __toESM(require("os"));
+var path2 = __toESM(require("path"));
 var import_open = __toESM(require("open"));
-var EventAlertMessageLogger = class _EventAlertMessageLogger {
-  static shownBatchIds = /* @__PURE__ */ new Set();
+var EventAlertMessageLogger = class {
+  /**
+   * Creates a coordination file to prevent duplicate messages across processes.
+   * Returns true if this process should show the message (first one to create the file).
+   */
+  createCoordinationFile(type) {
+    try {
+      const batchId = getBatchRunId();
+      const tmpDir = os.tmpdir();
+      const fileName = `scenario-${type}-${batchId}`;
+      const filePath = path2.join(tmpDir, fileName);
+      fs2.writeFileSync(filePath, process.pid.toString(), { flag: "wx" });
+      return true;
+    } catch {
+      return false;
+    }
+  }
   /**
    * Shows a fancy greeting message about simulation reporting status.
    * Only shows once per batch run to avoid spam.
@@ -1897,10 +3588,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
     if (this.isGreetingDisabled()) {
       return;
     }
-    if (_EventAlertMessageLogger.shownBatchIds.has(getBatchRunId())) {
+    if (!this.createCoordinationFile("greeting")) {
       return;
     }
-    _EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
     this.displayGreeting();
   }
   /**
@@ -1911,6 +3601,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
     if (this.isGreetingDisabled()) {
       return;
     }
+    if (!this.createCoordinationFile(`watch-${params.scenarioSetId}`)) {
+      return;
+    }
     await this.displayWatchMessage(params);
   }
   isGreetingDisabled() {
@@ -1998,6 +3691,7 @@ var EventReporter = class {
       } else {
         const errorText = await response.text();
         this.logger.error(`[${event.type}] Event POST failed:`, {
+          endpoint: this.eventsEndpoint.href,
           status: response.status,
           statusText: response.statusText,
           error: errorText,
@@ -2155,19 +3849,27 @@ var agent = (content) => {
   return (_state, executor) => executor.agent(content);
 };
 var judge = (content) => {
-  return (_state, executor) => executor.judge(content);
+  return async (_state, executor) => {
+    await executor.judge(content);
+  };
 };
 var user = (content) => {
   return (_state, executor) => executor.user(content);
 };
 var proceed = (turns, onTurn, onStep) => {
-  return (_state, executor) => executor.proceed(turns, onTurn, onStep);
+  return async (_state, executor) => {
+    await executor.proceed(turns, onTurn, onStep);
+  };
 };
 var succeed = (reasoning) => {
-  return (_state, executor) => executor.succeed(reasoning);
+  return async (_state, executor) => {
+    await executor.succeed(reasoning);
+  };
 };
 var fail = (reasoning) => {
-  return (_state, executor) => executor.fail(reasoning);
+  return async (_state, executor) => {
+    await executor.fail(reasoning);
+  };
 };
 // src/runner/run.ts
@@ -2178,7 +3880,7 @@ async function run(cfg) {
   if (!cfg.description) {
     throw new Error("Scenario description is required");
   }
-  if ((cfg.maxTurns || 10) < 1) {
+  if (cfg.maxTurns && cfg.maxTurns < 1) {
     throw new Error("Max turns must be at least 1");
   }
   if (cfg.agents.length === 0) {
@@ -2200,10 +3902,10 @@ async function run(cfg) {
   let eventBus = null;
   let subscription = null;
   try {
-    const envConfig = getEnv();
+    const envConfig2 = getEnv();
     eventBus = new EventBus({
-      endpoint: envConfig.LANGWATCH_ENDPOINT,
-      apiKey: envConfig.LANGWATCH_API_KEY
+      endpoint: envConfig2.LANGWATCH_ENDPOINT,
+      apiKey: envConfig2.LANGWATCH_API_KEY
     });
     eventBus.listen();
     subscription = eventBus.subscribeTo(execution.events$);
@@ -2254,14 +3956,13 @@ function formatPart(part) {
     case "file":
       return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
     case "tool-call":
-      return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
+      return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
     case "tool-result":
-      return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
+      return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
     case "reasoning":
       return `(reasoning): ${part.text}`;
-    case "redacted-reasoning":
-      return `(redacted reasoning): ${part.data}`;
     default:
+      part;
       return `Unknown content: ${JSON.stringify(part)}`;
   }
 }
@@ -2280,9 +3981,11 @@ var index_default = scenario;
   AgentAdapter,
   AgentRole,
   DEFAULT_MAX_TURNS,
-  DEFAULT_TEMPERATURE,
   DEFAULT_VERBOSE,
   JudgeAgentAdapter,
+  JudgeSpanCollector,
+  JudgeSpanDigestFormatter,
+  RealtimeAgentAdapter,
   ScenarioExecution,
   ScenarioExecutionState,
   StateChangeEventType,
@@ -2293,6 +3996,8 @@ var index_default = scenario;
   fail,
   judge,
   judgeAgent,
+  judgeSpanCollector,
+  judgeSpanDigestFormatter,
   message,
   proceed,
   run,