npm - @langwatch/scenario - Versions diffs - 0.2.0-prerelease.0 → 0.2.0 - Mend

@langwatch/scenario 0.2.0-prerelease.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +72 -17
package/dist/chunk-7P6ASYW6.mjs +9 -0
package/dist/chunk-ORWSJC5F.mjs +309 -0
package/dist/index.d.mts +642 -515
package/dist/index.d.ts +642 -515
package/dist/index.js +977 -907
package/dist/index.mjs +845 -1073
package/dist/integrations/vitest/reporter.d.mts +9 -0
package/dist/integrations/vitest/reporter.d.ts +9 -0
package/dist/integrations/vitest/reporter.js +168 -0
package/dist/integrations/vitest/reporter.mjs +139 -0
package/dist/integrations/vitest/setup.d.mts +2 -0
package/dist/integrations/vitest/setup.d.ts +2 -0
package/dist/integrations/vitest/setup.js +377 -0
package/dist/integrations/vitest/setup.mjs +51 -0
package/package.json +17 -5

package/dist/index.mjs CHANGED Viewed

@@ -1,28 +1,33 @@
-// src/script/index.ts
-var message = (message2) => {
-  return (_state, executor) => executor.message(message2);
-};
-var agent = (content) => {
-  return (_state, executor) => executor.agent(content);
-};
-var judge = (content) => {
-  return (_state, executor) => executor.judge(content);
-};
-var user = (content) => {
-  return (_state, executor) => executor.user(content);
-};
-var proceed = (turns, onTurn, onStep) => {
-  return (_state, executor) => executor.proceed(turns, onTurn, onStep);
-};
-var succeed = (reasoning) => {
-  return (_state, executor) => executor.succeed(reasoning);
-};
-var fail = (reasoning) => {
-  return (_state, executor) => executor.fail(reasoning);
-};
+import {
+  EventBus,
+  Logger
+} from "./chunk-ORWSJC5F.mjs";
+import {
+  __export
+} from "./chunk-7P6ASYW6.mjs";
-// src/execution/scenario-execution.ts
-import { Subject } from "rxjs";
+// src/agents/index.ts
+var agents_exports = {};
+__export(agents_exports, {
+  judgeAgent: () => judgeAgent,
+  userSimulatorAgent: () => userSimulatorAgent
+});
+// src/agents/judge-agent.ts
+import { generateText, tool } from "ai";
+import { z as z2 } from "zod";
+// src/domain/index.ts
+var domain_exports = {};
+__export(domain_exports, {
+  AgentAdapter: () => AgentAdapter,
+  AgentRole: () => AgentRole,
+  JudgeAgentAdapter: () => JudgeAgentAdapter,
+  UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
+  allAgentRoles: () => allAgentRoles,
+  defineConfig: () => defineConfig,
+  scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
+});
 // src/domain/core/config.ts
 import { z } from "zod";
@@ -66,347 +71,448 @@ var JudgeAgentAdapter = class {
   }
 };
-// src/utils/ids.ts
-import { generate, parse } from "xksuid";
-var batchRunId = null;
-function generateThreadId() {
-  return `thread_${generate()}`;
-}
-function generateScenarioRunId() {
-  return `scenariorun_${generate()}`;
-}
-function generateScenarioId() {
-  return `scenario_${generate()}`;
-}
-function getBatchRunId() {
-  if (!batchRunId) {
-    batchRunId = process.env.SCENARIO_BATCH_RUN_ID ?? `scenariobatchrun_${generate()}`;
-  }
-  return batchRunId;
-}
-function generateMessageId() {
-  return `scenariomsg_${generate()}`;
-}
-// src/execution/scenario-execution-state.ts
-var ScenarioExecutionState = class {
-  _history = [];
-  _turn = 0;
-  _partialResult = null;
-  _threadId = "";
-  _agents = [];
-  _pendingMessages = /* @__PURE__ */ new Map();
-  _pendingRolesOnTurn = [];
-  _pendingAgentsOnTurn = /* @__PURE__ */ new Set();
-  _agentTimes = /* @__PURE__ */ new Map();
-  _totalStartTime = 0;
-  /**
-   * Creates a new ScenarioExecutionState.
-   */
-  constructor() {
-    this._totalStartTime = Date.now();
-  }
-  setThreadId(threadId) {
-    this._threadId = threadId;
-  }
-  setAgents(agents) {
-    this._agents = agents;
-    this._pendingMessages.clear();
-    this._agentTimes.clear();
-  }
-  appendMessage(role, content) {
-    const message2 = { role, content };
-    this._history.push({ ...message2, id: generateMessageId() });
-  }
-  appendUserMessage(content) {
-    this.appendMessage("user", content);
-  }
-  appendAssistantMessage(content) {
-    this.appendMessage("assistant", content);
-  }
-  addMessage(message2, fromAgentIdx) {
-    this._history.push({ ...message2, id: generateMessageId() });
-    for (let idx = 0; idx < this._agents.length; idx++) {
-      if (idx === fromAgentIdx) continue;
-      if (!this._pendingMessages.has(idx)) {
-        this._pendingMessages.set(idx, []);
-      }
-      this._pendingMessages.get(idx).push(message2);
-    }
-  }
-  addMessages(messages, fromAgentIdx) {
-    for (const message2 of messages) {
-      this.addMessage(message2, fromAgentIdx);
-    }
-  }
-  getPendingMessages(agentIdx) {
-    return this._pendingMessages.get(agentIdx) || [];
-  }
-  clearPendingMessages(agentIdx) {
-    this._pendingMessages.set(agentIdx, []);
-  }
-  newTurn() {
-    this._pendingAgentsOnTurn = new Set(this._agents);
-    this._pendingRolesOnTurn = [
-      "User" /* USER */,
-      "Agent" /* AGENT */,
-      "Judge" /* JUDGE */
-    ];
-    if (this._turn === null) {
-      this._turn = 1;
-    } else {
-      this._turn++;
-    }
-  }
-  removePendingRole(role) {
-    const index = this._pendingRolesOnTurn.indexOf(role);
-    if (index > -1) {
-      this._pendingRolesOnTurn.splice(index, 1);
+// src/agents/utils.ts
+var toolMessageRole = "tool";
+var assistantMessageRole = "assistant";
+var userMessageRole = "user";
+var groupMessagesByToolBoundaries = (messages) => {
+  const segments = [];
+  let currentSegment = [];
+  for (const message2 of messages) {
+    currentSegment.push(message2);
+    if (message2.role === toolMessageRole) {
+      segments.push(currentSegment);
+      currentSegment = [];
     }
   }
-  removePendingAgent(agent2) {
-    this._pendingAgentsOnTurn.delete(agent2);
+  if (currentSegment.length > 0) {
+    segments.push(currentSegment);
   }
-  getNextAgentForRole(role) {
-    for (let i = 0; i < this._agents.length; i++) {
-      const agent2 = this._agents[i];
-      if (agent2.role === role && this._pendingAgentsOnTurn.has(agent2)) {
-        return { index: i, agent: agent2 };
-      }
+  return segments;
+};
+var segmentHasToolMessages = (segment) => {
+  return segment.some((message2) => {
+    if (message2.role === toolMessageRole) return true;
+    if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
+      return message2.content.some((part) => part.type === "tool-call");
     }
-    return null;
-  }
-  addAgentTime(agentIdx, time) {
-    const currentTime = this._agentTimes.get(agentIdx) || 0;
-    this._agentTimes.set(agentIdx, currentTime + time);
-  }
-  hasResult() {
-    return this._partialResult !== null;
-  }
-  setResult(result) {
-    this._partialResult = result;
-  }
-  get lastMessage() {
-    return this._history[this._history.length - 1];
-  }
-  get lastUserMessage() {
-    return this._history.findLast((message2) => message2.role === "user");
-  }
-  get lastAssistantMessage() {
-    return this._history.findLast((message2) => message2.role === "assistant");
-  }
-  get lastToolCall() {
-    return this._history.findLast((message2) => message2.role === "tool");
-  }
-  getLastToolCallByToolName(toolName) {
-    const toolMessage = this._history.findLast(
-      (message2) => message2.role === "tool" && message2.content.find(
-        (part) => part.type === "tool-result" && part.toolName === toolName
-      )
-    );
-    return toolMessage;
-  }
-  hasToolCall(toolName) {
-    return this._history.some(
-      (message2) => message2.role === "tool" && message2.content.find(
-        (part) => part.type === "tool-result" && part.toolName === toolName
-      )
-    );
-  }
-  get history() {
-    return this._history;
-  }
-  get historyWithoutLastMessage() {
-    return this._history.slice(0, -1);
-  }
-  get historyWithoutLastUserMessage() {
-    const lastUserMessageIndex = this._history.findLastIndex((message2) => message2.role === "user");
-    if (lastUserMessageIndex === -1) return this._history;
-    return this._history.slice(0, lastUserMessageIndex);
-  }
-  get turn() {
-    return this._turn;
-  }
-  set turn(turn) {
-    this._turn = turn;
-  }
-  get threadId() {
-    return this._threadId;
-  }
-  get agents() {
-    return this._agents;
-  }
-  get pendingRolesOnTurn() {
-    return this._pendingRolesOnTurn;
-  }
-  set pendingRolesOnTurn(roles) {
-    this._pendingRolesOnTurn = roles;
-  }
-  get pendingAgentsOnTurn() {
-    return Array.from(this._pendingAgentsOnTurn);
-  }
-  set pendingAgentsOnTurn(agents) {
-    this._pendingAgentsOnTurn = new Set(agents);
-  }
-  get partialResult() {
-    return this._partialResult;
-  }
-  get totalTime() {
-    return Date.now() - this._totalStartTime;
-  }
-  get agentTimes() {
-    return new Map(this._agentTimes);
-  }
-  removeLastPendingRole() {
-    this._pendingRolesOnTurn.pop();
-  }
+    return false;
+  });
+};
+var reverseSegmentRoles = (segment) => {
+  return segment.map((message2) => {
+    const hasStringContent = typeof message2.content === "string";
+    if (!hasStringContent) return message2;
+    const roleMap = {
+      [userMessageRole]: assistantMessageRole,
+      [assistantMessageRole]: userMessageRole
+    };
+    const newRole = roleMap[message2.role];
+    if (!newRole) return message2;
+    return {
+      role: newRole,
+      content: message2.content
+    };
+  });
+};
+var messageRoleReversal = (messages) => {
+  const segments = groupMessagesByToolBoundaries(messages);
+  const processedSegments = segments.map(
+    (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
+  );
+  return processedSegments.flat();
+};
+var criterionToParamName = (criterion) => {
+  return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
 };
-// src/events/schema.ts
-import { EventType, MessagesSnapshotEventSchema } from "@ag-ui/core";
-import { z as z2 } from "zod";
-var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
-  ScenarioRunStatus2["SUCCESS"] = "SUCCESS";
-  ScenarioRunStatus2["ERROR"] = "ERROR";
-  ScenarioRunStatus2["CANCELLED"] = "CANCELLED";
-  ScenarioRunStatus2["IN_PROGRESS"] = "IN_PROGRESS";
-  ScenarioRunStatus2["PENDING"] = "PENDING";
-  ScenarioRunStatus2["FAILED"] = "FAILED";
-  return ScenarioRunStatus2;
-})(ScenarioRunStatus || {});
-var baseEventSchema = z2.object({
-  type: z2.nativeEnum(EventType),
-  timestamp: z2.number().optional(),
-  rawEvent: z2.any().optional()
-});
-var baseScenarioEventSchema = baseEventSchema.extend({
-  batchRunId: z2.string(),
-  scenarioId: z2.string(),
-  scenarioRunId: z2.string()
-});
-var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
-  type: z2.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
-  metadata: z2.object({
-    name: z2.string(),
-    description: z2.string().optional()
-    // config: z.record(z.unknown()).optional(),
-  })
-});
-var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
-  type: z2.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
-  status: z2.nativeEnum(ScenarioRunStatus)
-  //   error: z
-  //     .object({
-  //       message: z.string(),
-  //       code: z.string().optional(),
-  //       stack: z.string().optional(),
-  //     })
-  //     .optional(),
-  //   metrics: z.record(z.number()).optional(),
-});
-var scenarioMessageSnapshotSchema = MessagesSnapshotEventSchema.merge(
-  baseScenarioEventSchema.extend({
-    type: z2.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
-  })
-);
-var scenarioEventSchema = z2.discriminatedUnion("type", [
-  scenarioRunStartedSchema,
-  scenarioRunFinishedSchema,
-  scenarioMessageSnapshotSchema
-]);
-var successSchema = z2.object({ success: z2.boolean() });
-var errorSchema = z2.object({ error: z2.string() });
-var stateSchema = z2.object({
-  state: z2.object({
-    messages: z2.array(z2.any()),
-    status: z2.string()
-  })
-});
-var runsSchema = z2.object({ runs: z2.array(z2.string()) });
-var eventsSchema = z2.object({ events: z2.array(scenarioEventSchema) });
-// src/utils/logger.ts
-var Logger = class _Logger {
-  constructor(context) {
-    this.context = context;
-  }
-  /**
-   * Creates a logger with context (e.g., class name)
-   */
-  static create(context) {
-    return new _Logger(context);
-  }
-  /**
-   * Checks if logging should occur based on LOG_LEVEL env var
-   */
-  shouldLog(level) {
-    const logLevel = (process.env.SCENARIO_LOG_LEVEL || "").toLowerCase();
-    const levels = ["error", "warn", "info", "debug"];
-    const currentLevelIndex = levels.indexOf(logLevel);
-    const requestedLevelIndex = levels.indexOf(level);
-    return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
-  }
-  formatMessage(message2) {
-    return this.context ? `[${this.context}] ${message2}` : message2;
-  }
-  error(message2, data) {
-    if (this.shouldLog("error")) {
-      const formattedMessage = this.formatMessage(message2);
-      if (data) {
-        console.error(formattedMessage, data);
-      } else {
-        console.error(formattedMessage);
+// src/config/load.ts
+import fs from "node:fs/promises";
+import path from "node:path";
+import { pathToFileURL } from "node:url";
+async function loadScenarioProjectConfig() {
+  const cwd = process.cwd();
+  const configNames = [
+    "scenario.config.js",
+    "scenario.config.mjs"
+  ];
+  for (const name of configNames) {
+    const fullPath = path.join(cwd, name);
+    try {
+      await fs.access(fullPath);
+      const configModule = await import(pathToFileURL(fullPath).href);
+      const config2 = configModule.default || configModule;
+      const parsed = scenarioProjectConfigSchema.safeParse(config2);
+      if (!parsed.success) {
+        throw new Error(
+          `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
+        );
       }
-    }
-  }
-  warn(message2, data) {
-    if (this.shouldLog("warn")) {
-      const formattedMessage = this.formatMessage(message2);
-      if (data) {
-        console.warn(formattedMessage, data);
-      } else {
-        console.warn(formattedMessage);
+      return parsed.data;
+    } catch (error) {
+      if (error instanceof Error && "code" in error && error.code === "ENOENT") {
+        continue;
       }
+      throw error;
     }
   }
-  info(message2, data) {
-    if (this.shouldLog("info")) {
-      const formattedMessage = this.formatMessage(message2);
-      if (data) {
-        console.info(formattedMessage, data);
-      } else {
-        console.info(formattedMessage);
-      }
-    }
+  return await scenarioProjectConfigSchema.parseAsync({});
+}
+// src/config/index.ts
+var logger = new Logger("scenario.config");
+var configLoaded = false;
+var config = null;
+var configLoadPromise = null;
+async function loadProjectConfig() {
+  if (configLoaded) {
+    return;
   }
-  debug(message2, data) {
-    if (this.shouldLog("debug")) {
-      const formattedMessage = this.formatMessage(message2);
-      if (data) {
-        console.log(formattedMessage, data);
-      } else {
-        console.log(formattedMessage);
-      }
-    }
+  if (configLoadPromise) {
+    return configLoadPromise;
   }
-};
+  configLoadPromise = (async () => {
+    try {
+      config = await loadScenarioProjectConfig();
+      logger.info("loaded scenario project config", { config });
+    } catch (error) {
+      logger.error("error loading scenario project config", { error });
+    } finally {
+      configLoaded = true;
+    }
+  })();
+  return configLoadPromise;
+}
+async function getProjectConfig() {
+  await loadProjectConfig();
+  return config;
+}
-// src/execution/scenario-execution.ts
-var batchRunId2 = getBatchRunId();
-function convertAgentReturnTypesToMessages(response, role) {
-  if (typeof response === "string")
-    return [{ role, content: response }];
-  if (Array.isArray(response))
-    return response;
-  if (typeof response === "object" && "role" in response)
-    return [response];
-  return [];
+// src/utils/config.ts
+function mergeConfig(config2, projectConfig) {
+  if (!projectConfig) {
+    return config2;
+  }
+  return {
+    ...projectConfig.defaultModel,
+    ...config2
+  };
+}
+function mergeAndValidateConfig(config2, projectConfig) {
+  var _a;
+  const mergedConfig = mergeConfig(config2, projectConfig);
+  mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
+  if (!mergedConfig.model) {
+    throw new Error("Model is required");
+  }
+  return mergedConfig;
+}
+// src/agents/judge-agent.ts
+function buildSystemPrompt(criteria, description) {
+  const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
+  return `
+<role>
+You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
+</role>
+<goal>
+Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
+If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
+</goal>
+<scenario>
+${description}
+</scenario>
+<criteria>
+${criteriaList}
+</criteria>
+<rules>
+- Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
+- DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
+</rules>
+`.trim();
+}
+function buildContinueTestTool() {
+  return tool({
+    description: "Continue the test with the next step",
+    parameters: z2.object({})
+  });
+}
+function buildFinishTestTool(criteria) {
+  const criteriaNames = criteria.map(criterionToParamName);
+  return tool({
+    description: "Complete the test with a final verdict",
+    parameters: z2.object({
+      criteria: z2.object(
+        Object.fromEntries(
+          criteriaNames.map((name, idx) => [
+            name,
+            z2.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
+          ])
+        )
+      ).strict().describe("Strict verdict for each criterion"),
+      reasoning: z2.string().describe("Explanation of what the final verdict should be"),
+      verdict: z2.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
+    })
+  });
 }
+var judgeAgent = (cfg) => {
+  return {
+    role: "Judge" /* JUDGE */,
+    criteria: cfg.criteria,
+    call: async (input) => {
+      var _a;
+      const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
+      const messages = [
+        { role: "system", content: systemPrompt },
+        ...input.messages
+      ];
+      const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
+      const projectConfig = await getProjectConfig();
+      const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
+      if (!mergedConfig.model) {
+        throw new Error("Model is required for the judge agent");
+      }
+      const tools = {
+        continue_test: buildContinueTestTool(),
+        finish_test: buildFinishTestTool(cfg.criteria)
+      };
+      const enforceJudgement = input.judgmentRequest;
+      const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
+      if (enforceJudgement && !hasCriteria) {
+        return {
+          success: false,
+          messages: [],
+          reasoning: "JudgeAgent: No criteria was provided to be judged against",
+          metCriteria: [],
+          unmetCriteria: []
+        };
+      }
+      const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
+      const completion = await generateText({
+        model: mergedConfig.model,
+        messages,
+        temperature: mergedConfig.temperature ?? 0,
+        maxTokens: mergedConfig.maxTokens,
+        tools,
+        toolChoice
+      });
+      let args;
+      if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
+        const toolCall = completion.toolCalls[0];
+        switch (toolCall.toolName) {
+          case "finish_test": {
+            args = toolCall.args;
+            const verdict = args.verdict || "inconclusive";
+            const reasoning = args.reasoning || "No reasoning provided";
+            const criteria = args.criteria || {};
+            const criteriaValues = Object.values(criteria);
+            const metCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] === "true");
+            const unmetCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] !== "true");
+            return {
+              success: verdict === "success",
+              messages: input.messages,
+              reasoning,
+              metCriteria,
+              unmetCriteria
+            };
+          }
+          case "continue_test":
+            return [];
+          default:
+            return {
+              success: false,
+              messages: input.messages,
+              reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
+              metCriteria: [],
+              unmetCriteria: cfg.criteria
+            };
+        }
+      }
+      return {
+        success: false,
+        messages: input.messages,
+        reasoning: `JudgeAgent: No tool call found in LLM output`,
+        metCriteria: [],
+        unmetCriteria: cfg.criteria
+      };
+    }
+  };
+};
+// src/agents/user-simulator-agent.ts
+import { generateText as generateText2 } from "ai";
+function buildSystemPrompt2(description) {
+  return `
+<role>
+You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
+Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
+</role>
+<goal>
+Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
+</goal>
+<scenario>
+${description}
+</scenario>
+<rules>
+- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
+</rules>
+`.trim();
+}
+var userSimulatorAgent = (config2) => {
+  return {
+    role: "User" /* USER */,
+    call: async (input) => {
+      const systemPrompt = buildSystemPrompt2(input.scenarioConfig.description);
+      const messages = [
+        { role: "system", content: systemPrompt },
+        { role: "assistant", content: "Hello, how can I help you today" },
+        ...input.messages
+      ];
+      const projectConfig = await getProjectConfig();
+      const mergedConfig = mergeAndValidateConfig(config2 ?? {}, projectConfig);
+      if (!mergedConfig.model) {
+        throw new Error("Model is required for the user simulator agent");
+      }
+      const reversedMessages = messageRoleReversal(messages);
+      const completion = await generateText2({
+        model: mergedConfig.model,
+        messages: reversedMessages,
+        temperature: mergedConfig.temperature ?? 0,
+        maxTokens: mergedConfig.maxTokens
+      });
+      const messageContent = completion.text;
+      if (!messageContent) {
+        throw new Error("No response content from LLM");
+      }
+      return { role: "user", content: messageContent };
+    }
+  };
+};
+// src/execution/index.ts
+var execution_exports = {};
+__export(execution_exports, {
+  ScenarioExecution: () => ScenarioExecution,
+  ScenarioExecutionState: () => ScenarioExecutionState
+});
+// src/execution/scenario-execution.ts
+import { Subject } from "rxjs";
+// src/utils/ids.ts
+import { generate, parse } from "xksuid";
+var batchRunId = null;
+function generateThreadId() {
+  return `thread_${generate()}`;
+}
+function generateScenarioRunId() {
+  return `scenariorun_${generate()}`;
+}
+function generateScenarioId() {
+  return `scenario_${generate()}`;
+}
+function getBatchRunId() {
+  if (!batchRunId) {
+    batchRunId = process.env.SCENARIO_BATCH_RUN_ID ?? `scenariobatchrun_${generate()}`;
+  }
+  return batchRunId;
+}
+function generateMessageId() {
+  return `scenariomsg_${generate()}`;
+}
+// src/execution/scenario-execution-state.ts
+var ScenarioExecutionState = class {
+  _messages = [];
+  _currentTurn = 0;
+  _threadId = "";
+  description;
+  config;
+  constructor(config2) {
+    this.config = config2;
+    this.description = config2.description;
+  }
+  get messages() {
+    return this._messages;
+  }
+  get currentTurn() {
+    return this._currentTurn;
+  }
+  set currentTurn(turn) {
+    this._currentTurn = turn;
+  }
+  get threadId() {
+    return this._threadId;
+  }
+  set threadId(value) {
+    this._threadId = value;
+  }
+  /**
+   * Adds a message to the conversation history.
+   *
+   * @param message - The message to add.
+   */
+  addMessage(message2) {
+    this._messages.push({ ...message2, id: generateMessageId() });
+  }
+  lastMessage() {
+    if (this._messages.length === 0) {
+      throw new Error("No messages in history");
+    }
+    return this._messages[this._messages.length - 1];
+  }
+  lastUserMessage() {
+    if (this._messages.length === 0) {
+      throw new Error("No messages in history");
+    }
+    const lastMessage = this._messages.findLast((message2) => message2.role === "user");
+    if (!lastMessage) {
+      throw new Error("No user message in history");
+    }
+    return lastMessage;
+  }
+  lastToolCall(toolName) {
+    if (this._messages.length === 0) {
+      throw new Error("No messages in history");
+    }
+    const lastMessage = this._messages.findLast((message2) => message2.role === "tool" && message2.content.find(
+      (part) => part.type === "tool-result" && part.toolName === toolName
+    ));
+    if (!lastMessage) {
+      throw new Error("No tool call message in history");
+    }
+    return lastMessage;
+  }
+  hasToolCall(toolName) {
+    return this._messages.some(
+      (message2) => message2.role === "tool" && message2.content.find(
+        (part) => part.type === "tool-result" && part.toolName === toolName
+      )
+    );
+  }
+};
+// src/execution/scenario-execution.ts
+var batchRunId2 = getBatchRunId();
 var ScenarioExecution = class {
-  state = new ScenarioExecutionState();
+  state;
   eventSubject = new Subject();
   logger = new Logger("scenario.execution.ScenarioExecution");
   config;
+  agents = [];
+  pendingRolesOnTurn = [];
+  pendingAgentsOnTurn = /* @__PURE__ */ new Set();
+  pendingMessages = /* @__PURE__ */ new Map();
+  partialResult = null;
+  agentTimes = /* @__PURE__ */ new Map();
+  totalStartTime = 0;
   /**
    * An observable stream of events that occur during the scenario execution.
    * Subscribe to this to monitor the progress of the scenario in real-time.
@@ -426,15 +532,17 @@ var ScenarioExecution = class {
       script,
       verbose: config2.verbose ?? false,
       maxTurns: config2.maxTurns ?? 10,
-      threadId: config2.threadId ?? generateThreadId()
+      threadId: config2.threadId ?? generateThreadId(),
+      setId: config2.setId
     };
+    this.state = new ScenarioExecutionState(this.config);
     this.reset();
   }
   /**
    * The history of messages in the conversation.
    */
-  get history() {
-    return this.state.history;
+  get messages() {
+    return this.state.messages;
   }
   /**
    * The unique identifier for the conversation thread.
@@ -442,6 +550,12 @@ var ScenarioExecution = class {
   get threadId() {
     return this.state.threadId;
   }
+  /**
+   * The total elapsed time for the scenario execution.
+   */
+  get totalTime() {
+    return Date.now() - this.totalStartTime;
+  }
   /**
    * Executes the entire scenario from start to finish.
    * This will run through the script and any automatic proceeding logic until a
@@ -462,7 +576,8 @@ var ScenarioExecution = class {
         if (result && typeof result === "object" && "success" in result) {
           this.emitRunFinished({
             scenarioRunId,
-            status: result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */
+            status: result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
+            result
           });
           return result;
         }
@@ -475,11 +590,20 @@ var ScenarioExecution = class {
         "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
       ].join("\n"));
     } catch (error) {
+      const errorResult = {
+        success: false,
+        messages: this.state.messages,
+        reasoning: `Scenario failed with error: ${error instanceof Error ? error.message : String(error)}`,
+        metCriteria: [],
+        unmetCriteria: [],
+        error: error instanceof Error ? error.message : String(error)
+      };
       this.emitRunFinished({
         scenarioRunId,
-        status: "ERROR" /* ERROR */
+        status: "ERROR" /* ERROR */,
+        result: errorResult
       });
-      throw error;
+      return errorResult;
     }
   }
   /**
@@ -494,29 +618,29 @@ var ScenarioExecution = class {
     return result;
   }
   async _step(goToNextTurn = true, onTurn) {
-    if (this.state.pendingRolesOnTurn.length === 0) {
+    if (this.pendingRolesOnTurn.length === 0) {
       if (!goToNextTurn) return null;
-      this.state.newTurn();
+      this.newTurn();
       if (onTurn) await onTurn(this.state);
-      if (this.state.turn != null && this.state.turn >= this.config.maxTurns)
+      if (this.state.currentTurn >= this.config.maxTurns)
         return this.reachedMaxTurns();
     }
-    const currentRole = this.state.pendingRolesOnTurn[0];
+    const currentRole = this.pendingRolesOnTurn[0];
     const { idx, agent: nextAgent } = this.nextAgentForRole(currentRole);
     if (!nextAgent) {
-      this.state.removePendingRole(currentRole);
+      this.removePendingRole(currentRole);
       return this._step(goToNextTurn, onTurn);
     }
-    this.state.removePendingAgent(nextAgent);
+    this.removePendingAgent(nextAgent);
     return await this.callAgent(idx, currentRole);
   }
   async callAgent(idx, role, judgmentRequest = false) {
-    const agent2 = this.state.agents[idx];
+    const agent2 = this.agents[idx];
     const startTime = Date.now();
     const agentInput = {
       threadId: this.state.threadId,
-      messages: this.state.history,
-      newMessages: this.state.getPendingMessages(idx),
+      messages: this.state.messages,
+      newMessages: this.pendingMessages.get(idx) ?? [],
       requestedRole: role,
       judgmentRequest,
       scenarioState: this.state,
@@ -524,106 +648,22 @@ var ScenarioExecution = class {
     };
     const agentResponse = await agent2.call(agentInput);
     const endTime = Date.now();
-    this.state.addAgentTime(idx, endTime - startTime);
-    this.state.clearPendingMessages(idx);
-    if (typeof agentResponse === "object" && agentResponse && "success" in agentResponse) {
+    this.addAgentTime(idx, endTime - startTime);
+    this.pendingMessages.delete(idx);
+    if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
       return agentResponse;
     }
+    const currentAgentTime = this.agentTimes.get(idx) ?? 0;
+    this.agentTimes.set(idx, currentAgentTime + (Date.now() - startTime));
     const messages = convertAgentReturnTypesToMessages(
       agentResponse,
       role === "User" /* USER */ ? "user" : "assistant"
     );
-    this.state.addMessages(messages, idx);
-    return messages;
-  }
-  nextAgentForRole(role) {
-    for (const agent2 of this.state.agents) {
-      if (agent2.role === role && this.state.pendingAgentsOnTurn.includes(agent2) && this.state.pendingRolesOnTurn.includes(role)) {
-        return { idx: this.state.agents.indexOf(agent2), agent: agent2 };
-      }
-    }
-    return { idx: -1, agent: null };
-  }
-  reachedMaxTurns(errorMessage) {
-    var _a;
-    const agentRoleAgentsIdx = this.state.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
-    const agentTimes = agentRoleAgentsIdx.map((i) => this.state.agentTimes.get(i) || 0);
-    const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
-    return {
-      success: false,
-      messages: this.state.history,
-      reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
-      passedCriteria: [],
-      failedCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? [],
-      totalTime: this.state.totalTime,
-      agentTime: totalAgentTime
-    };
-  }
-  getJudgeAgent() {
-    return this.state.agents.find((agent2) => agent2 instanceof JudgeAgentAdapter) ?? null;
-  }
-  consumeUntilRole(role) {
-    while (this.state.pendingRolesOnTurn.length > 0) {
-      const nextRole = this.state.pendingRolesOnTurn[0];
-      if (nextRole === role) break;
-      this.state.pendingRolesOnTurn.pop();
-    }
-  }
-  async scriptCallAgent(role, content, judgmentRequest = false) {
-    this.consumeUntilRole(role);
-    let index = -1;
-    let agent2 = null;
-    const nextAgent = this.state.getNextAgentForRole(role);
-    if (!nextAgent) {
-      this.state.newTurn();
-      this.consumeUntilRole(role);
-      const nextAgent2 = this.state.getNextAgentForRole(role);
-      if (!nextAgent2) {
-        let roleClass = "";
-        switch (role) {
-          case "User" /* USER */:
-            roleClass = "a scenario.userSimulatorAgent()";
-            break;
-          case "Agent" /* AGENT */:
-            roleClass = "a scenario.agent()";
-            break;
-          case "Judge" /* JUDGE */:
-            roleClass = "a scenario.judgeAgent()";
-            break;
-          default:
-            roleClass = "your agent";
-        }
-        if (content)
-          throw new Error(
-            `Cannot generate a message for role \`${role}\` with content \`${content}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
-          );
-        throw new Error(
-          `Cannot generate a message for role \`${role}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
-        );
-      }
-      index = nextAgent2.index;
-      agent2 = nextAgent2.agent;
-    } else {
-      index = nextAgent.index;
-      agent2 = nextAgent.agent;
-    }
-    this.state.removePendingAgent(agent2);
-    if (content) {
-      if (typeof content === "string") {
-        if (role === "User" /* USER */) {
-          this.state.addMessage({ role: "user", content });
-        } else {
-          this.state.addMessage({ role: "assistant", content });
-        }
-      } else {
-        this.state.addMessage(content);
-      }
-      return null;
+    for (const message2 of messages) {
+      this.state.addMessage(message2);
+      this.broadcastMessage(message2, idx);
     }
-    const result = await this.callAgent(index, role, judgmentRequest);
-    if (Array.isArray(result))
-      return null;
-    return result;
+    return messages;
   }
   /**
    * Adds a message to the conversation history.
@@ -637,6 +677,7 @@ var ScenarioExecution = class {
       await this.scriptCallAgent("Agent" /* AGENT */, message2);
     } else {
       this.state.addMessage(message2);
+      this.broadcastMessage(message2);
     }
   }
   /**
@@ -678,12 +719,12 @@ var ScenarioExecution = class {
    * @returns A promise that resolves with the scenario result if a conclusion is reached.
    */
   async proceed(turns, onTurn, onStep) {
-    let initialTurn = this.state.turn;
+    let initialTurn = this.state.currentTurn;
     while (true) {
-      const goToNextTurn = turns === void 0 || initialTurn === null || this.state.turn != null && this.state.turn + 1 < initialTurn + turns;
+      const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
       const nextMessage = await this._step(goToNextTurn, onTurn);
       if (initialTurn === null)
-        initialTurn = this.state.turn;
+        initialTurn = this.state.currentTurn;
       if (nextMessage === null) {
         return null;
       }
@@ -701,10 +742,10 @@ var ScenarioExecution = class {
   async succeed(reasoning) {
     return {
       success: true,
-      messages: this.state.history,
+      messages: this.state.messages,
       reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
-      passedCriteria: [],
-      failedCriteria: []
+      metCriteria: [],
+      unmetCriteria: []
     };
   }
   /**
@@ -716,656 +757,385 @@ var ScenarioExecution = class {
   async fail(reasoning) {
     return {
       success: false,
-      messages: this.state.history,
+      messages: this.state.messages,
       reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
-      passedCriteria: [],
-      failedCriteria: []
+      metCriteria: [],
+      unmetCriteria: []
     };
   }
-  reset() {
-    this.state = new ScenarioExecutionState();
-    this.state.setThreadId(this.config.threadId || generateThreadId());
-    this.state.setAgents(this.config.agents);
-    this.state.newTurn();
-    this.state.turn = 0;
-  }
-  // =====================================================
-  // Event Emission Methods
-  // =====================================================
-  // These methods handle the creation and emission of
-  // scenario events for external consumption and monitoring
-  // =====================================================
-  /**
-   * Emits an event to the event stream for external consumption.
-   */
-  emitEvent(event) {
-    this.eventSubject.next(event);
-  }
-  /**
-   * Creates base event properties shared across all scenario events.
-   */
-  makeBaseEvent({ scenarioRunId }) {
-    return {
-      batchRunId: batchRunId2,
-      scenarioId: this.config.id,
-      scenarioRunId,
-      timestamp: Date.now(),
-      rawEvent: void 0
-    };
-  }
-  /**
-   * Emits a run started event to indicate scenario execution has begun.
-   */
-  emitRunStarted({ scenarioRunId }) {
-    this.emitEvent({
-      ...this.makeBaseEvent({ scenarioRunId }),
-      type: "SCENARIO_RUN_STARTED" /* RUN_STARTED */,
-      metadata: {
-        name: this.config.name,
-        description: this.config.description
-      }
-    });
+  addAgentTime(agentIdx, time) {
+    const currentTime = this.agentTimes.get(agentIdx) || 0;
+    this.agentTimes.set(agentIdx, currentTime + time);
   }
-  /**
-   * Emits a message snapshot event containing current conversation history.
-   */
-  emitMessageSnapshot({ scenarioRunId }) {
-    this.emitEvent({
-      ...this.makeBaseEvent({ scenarioRunId }),
-      type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
-      messages: this.state.history
-      // Add any other required fields from MessagesSnapshotEventSchema
-    });
+  hasResult() {
+    return this.partialResult !== null;
   }
-  /**
-   * Emits a run finished event with the final execution status.
-   */
-  emitRunFinished({
-    scenarioRunId,
-    status
-  }) {
-    this.emitEvent({
-      ...this.makeBaseEvent({ scenarioRunId }),
-      type: "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */,
-      status
-      // Add error/metrics fields if needed
-    });
+  setResult(result) {
+    this.partialResult = result;
   }
-};
-// src/config/load.ts
-import fs from "node:fs/promises";
-import path from "node:path";
-import { pathToFileURL } from "node:url";
-async function loadScenarioProjectConfig() {
-  const cwd = process.cwd();
-  const configNames = [
-    "scenario.config.js",
-    "scenario.config.mjs"
-  ];
-  for (const name of configNames) {
-    const fullPath = path.join(cwd, name);
-    try {
-      await fs.access(fullPath);
-      const configModule = await import(pathToFileURL(fullPath).href);
-      const config2 = configModule.default || configModule;
-      const parsed = scenarioProjectConfigSchema.safeParse(config2);
-      if (!parsed.success) {
-        throw new Error(
-          `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
-        );
-      }
-      return parsed.data;
-    } catch (error) {
-      if (error instanceof Error && "code" in error && error.code === "ENOENT") {
-        continue;
-      }
-      throw error;
+  async scriptCallAgent(role, content, judgmentRequest = false) {
+    this.consumeUntilRole(role);
+    let index = -1;
+    let agent2 = null;
+    let nextAgent = this.getNextAgentForRole(role);
+    if (!nextAgent) {
+      this.newTurn();
+      this.consumeUntilRole(role);
+      nextAgent = this.getNextAgentForRole(role);
     }
-  }
-  return await scenarioProjectConfigSchema.parseAsync({});
-}
-// src/events/event-bus.ts
-import { concatMap, EMPTY, catchError, Subject as Subject2 } from "rxjs";
-// src/events/event-reporter.ts
-var EventReporter = class {
-  eventsEndpoint;
-  apiKey;
-  logger = new Logger("scenario.events.EventReporter");
-  constructor(config2) {
-    this.eventsEndpoint = new URL("/api/scenario-events", config2.endpoint);
-    this.apiKey = config2.apiKey ?? "";
-    if (!process.env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO) {
-      console.log("=== Scenario Simulation Reporting ===");
-      if (!this.apiKey) {
-        console.warn("LangWatch API key not configured, simulations will be local");
-        console.warn(`To enable simulation reporting in the LangWatch dashboard, configure your LangWatch API key (via LANGWATCH_API_KEY, or scenario.config.js)`);
-      } else {
-        console.log("Simulation reporting is enabled");
-        console.log(`Endpoint: ${config2.endpoint} -> ${this.eventsEndpoint.href}`);
-        console.log(`API Key: ${!this.apiKey ? "not configured" : "configured"}`);
+    if (!nextAgent) {
+      let roleClass = "";
+      switch (role) {
+        case "User" /* USER */:
+          roleClass = "a scenario.userSimulatorAgent()";
+          break;
+        case "Agent" /* AGENT */:
+          roleClass = "a scenario.agent()";
+          break;
+        case "Judge" /* JUDGE */:
+          roleClass = "a scenario.judgeAgent()";
+          break;
+        default:
+          roleClass = "your agent";
       }
-      console.log("=== Scenario Simulation Reporting ===");
-    }
-  }
-  /**
-   * Posts an event to the configured endpoint.
-   * Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
-   */
-  async postEvent(event) {
-    this.logger.debug(`[${event.type}] Posting event`, {
-      event
-    });
-    if (!this.eventsEndpoint) {
-      this.logger.warn(
-        "No LANGWATCH_ENDPOINT configured, skipping event posting"
+      if (content)
+        throw new Error(
+          `Cannot generate a message for role \`${role}\` with content \`${content}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
+        );
+      throw new Error(
+        `Cannot generate a message for role \`${role}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
       );
-      return;
     }
-    try {
-      const response = await fetch(this.eventsEndpoint.href, {
-        method: "POST",
-        body: JSON.stringify(event),
-        headers: {
-          "Content-Type": "application/json",
-          "X-Auth-Token": this.apiKey
-        }
-      });
-      this.logger.debug(
-        `[${event.type}] Event POST response status: ${response.status}`
-      );
-      if (response.ok) {
-        const data = await response.json();
-        this.logger.debug(`[${event.type}] Event POST response:`, data);
-      } else {
-        const errorText = await response.text();
-        this.logger.error(`[${event.type}] Event POST failed:`, {
-          status: response.status,
-          statusText: response.statusText,
-          error: errorText,
-          event
-        });
-      }
-    } catch (error) {
-      this.logger.error(`[${event.type}] Event POST error:`, {
-        error,
-        event,
-        endpoint: this.eventsEndpoint
-      });
-    }
-  }
-};
-// src/events/event-bus.ts
-var EventBus = class {
-  events$ = new Subject2();
-  eventReporter;
-  processingPromise = null;
-  logger = new Logger("scenario.events.EventBus");
-  constructor(config2) {
-    this.eventReporter = new EventReporter(config2);
-  }
-  /**
-   * Publishes an event into the processing pipeline.
-   */
-  publish(event) {
-    this.logger.debug(`[${event.type}] Publishing event`, {
-      event
-    });
-    this.events$.next(event);
-  }
-  /**
-   * Begins listening for and processing events.
-   * Returns a promise that resolves when a RUN_FINISHED event is fully processed.
-   */
-  listen() {
-    this.logger.debug("Listening for events");
-    if (this.processingPromise) {
-      return this.processingPromise;
-    }
-    this.processingPromise = new Promise((resolve, reject) => {
-      this.events$.pipe(
-        concatMap(async (event) => {
-          this.logger.debug(`[${event.type}] Processing event`, {
-            event
-          });
-          await this.eventReporter.postEvent(event);
-          return event;
-        }),
-        catchError((error) => {
-          this.logger.error("Error in event stream:", error);
-          return EMPTY;
-        })
-      ).subscribe({
-        next: (event) => {
-          this.logger.debug(`[${event.type}] Event processed`, {
-            event
-          });
-          if (event.type === "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */) {
-            resolve();
-          }
-        },
-        error: (error) => {
-          this.logger.error("Error in event stream:", error);
-          reject(error);
-        }
-      });
-    });
-    return this.processingPromise;
-  }
-  /**
-   * Stops accepting new events and drains the processing queue.
-   */
-  async drain() {
-    this.logger.debug("Draining event stream");
-    this.events$.unsubscribe();
-    if (this.processingPromise) {
-      await this.processingPromise;
-    }
-  }
-  /**
-   * Subscribes to an event stream.
-   * @param source$ - The event stream to subscribe to.
-   */
-  subscribeTo(source$) {
-    this.logger.debug("Subscribing to event stream");
-    return source$.subscribe(this.events$);
-  }
-};
-// src/runner/run.ts
-async function run(cfg) {
-  if (!cfg.name) {
-    throw new Error("Scenario name is required");
-  }
-  if (!cfg.description) {
-    throw new Error("Scenario description is required");
-  }
-  if ((cfg.maxTurns || 10) < 1) {
-    throw new Error("Max turns must be at least 1");
-  }
-  if (cfg.agents.length === 0) {
-    throw new Error("At least one agent is required");
-  }
-  if (!cfg.agents.find((agent2) => agent2.role === "Agent" /* AGENT */)) {
-    throw new Error("At least one non-user/non-judge agent is required");
-  }
-  cfg.agents.forEach((agent2, i) => {
-    if (!allAgentRoles.includes(agent2.role)) {
-      throw new Error(`Agent ${i} has invalid role: ${agent2.role}`);
-    }
-  });
-  if (!cfg.threadId) {
-    cfg.threadId = generateThreadId();
-  }
-  const steps = cfg.script || [proceed()];
-  const execution = new ScenarioExecution(cfg, steps);
-  let eventBus = null;
-  let subscription = null;
-  try {
-    const projectConfig = await loadScenarioProjectConfig();
-    eventBus = new EventBus({
-      endpoint: projectConfig.langwatchEndpoint ?? process.env.LANGWATCH_ENDPOINT ?? "https://app.langwatch.ai",
-      apiKey: projectConfig.langwatchApiKey ?? process.env.LANGWATCH_API_KEY
-    });
-    eventBus.listen();
-    subscription = eventBus.subscribeTo(execution.events$);
-    const result = await execution.execute();
-    if (cfg.verbose && !result.success) {
-      console.log(`Scenario failed: ${cfg.name}`);
-      console.log(`Reasoning: ${result.reasoning}`);
-      console.log("--------------------------------");
-      console.log(`Passed criteria: ${result.passedCriteria.join("\n- ")}`);
-      console.log(`Failed criteria: ${result.failedCriteria.join("\n- ")}`);
-      console.log(result.messages.map(formatMessage).join("\n"));
+    index = nextAgent.index;
+    agent2 = nextAgent.agent;
+    this.removePendingAgent(agent2);
+    if (content) {
+      const message2 = typeof content === "string" ? { role: role === "User" /* USER */ ? "user" : "assistant", content } : content;
+      this.state.addMessage(message2);
+      this.broadcastMessage(message2, index);
+      return null;
     }
-    return result;
-  } finally {
-    await (eventBus == null ? void 0 : eventBus.drain());
-    subscription == null ? void 0 : subscription.unsubscribe();
+    const result = await this.callAgent(index, role, judgmentRequest);
+    if (result && typeof result === "object" && "success" in result) {
+      return result;
+    }
+    return null;
   }
-}
-function formatMessage(m) {
-  switch (m.role) {
-    case "user":
-      return `User: ${m.content}`;
-    case "assistant":
-      return `Assistant: ${formatParts(m.content)}`;
-    case "tool":
-      return `Tool: ${formatParts(m.content)}`;
-    default:
-      return `${m.role}: ${m.content}`;
+  reset() {
+    this.state = new ScenarioExecutionState(this.config);
+    this.state.threadId = this.config.threadId || generateThreadId();
+    this.setAgents(this.config.agents);
+    this.newTurn();
+    this.state.currentTurn = 0;
+    this.totalStartTime = Date.now();
+    this.pendingMessages.clear();
   }
-}
-function formatParts(part) {
-  if (typeof part === "string") {
-    return part;
+  nextAgentForRole(role) {
+    for (const agent2 of this.agents) {
+      if (agent2.role === role && this.pendingAgentsOnTurn.has(agent2) && this.pendingRolesOnTurn.includes(role)) {
+        return { idx: this.agents.indexOf(agent2), agent: agent2 };
+      }
+    }
+    return { idx: -1, agent: null };
   }
-  if (Array.isArray(part)) {
-    if (part.length === 1) {
-      return formatPart(part[0]);
+  newTurn() {
+    this.pendingAgentsOnTurn = new Set(this.agents);
+    this.pendingRolesOnTurn = [
+      "User" /* USER */,
+      "Agent" /* AGENT */,
+      "Judge" /* JUDGE */
+    ];
+    if (this.state.currentTurn === null) {
+      this.state.currentTurn = 1;
+    } else {
+      this.state.currentTurn++;
     }
-    return `
-${part.map(formatPart).join("\n")}`;
   }
-  return "Unknown content: " + JSON.stringify(part);
-}
-function formatPart(part) {
-  switch (part.type) {
-    case "text":
-      return part.text;
-    case "file":
-      return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
-    case "tool-call":
-      return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
-    case "tool-result":
-      return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
-    case "reasoning":
-      return `(reasoning): ${part.text}`;
-    case "redacted-reasoning":
-      return `(redacted reasoning): ${part.data}`;
-    default:
-      return `Unknown content: ${JSON.stringify(part)}`;
+  removePendingRole(role) {
+    const index = this.pendingRolesOnTurn.indexOf(role);
+    if (index > -1) {
+      this.pendingRolesOnTurn.splice(index, 1);
+    }
   }
-}
-// src/agents/judge-agent.ts
-import { generateText, tool } from "ai";
-import { z as z3 } from "zod";
-// src/agents/utils.ts
-var toolMessageRole = "tool";
-var assistantMessageRole = "assistant";
-var userMessageRole = "user";
-var groupMessagesByToolBoundaries = (messages) => {
-  const segments = [];
-  let currentSegment = [];
-  for (const message2 of messages) {
-    currentSegment.push(message2);
-    if (message2.role === toolMessageRole) {
-      segments.push(currentSegment);
-      currentSegment = [];
+  removePendingAgent(agent2) {
+    this.pendingAgentsOnTurn.delete(agent2);
+  }
+  getNextAgentForRole(role) {
+    for (let i = 0; i < this.agents.length; i++) {
+      const agent2 = this.agents[i];
+      if (agent2.role === role && this.pendingAgentsOnTurn.has(agent2)) {
+        return { index: i, agent: agent2 };
+      }
     }
+    return null;
   }
-  if (currentSegment.length > 0) {
-    segments.push(currentSegment);
+  setAgents(agents) {
+    this.agents = agents;
+    this.agentTimes.clear();
   }
-  return segments;
-};
-var segmentHasToolMessages = (segment) => {
-  return segment.some((message2) => {
-    if (message2.role === toolMessageRole) return true;
-    if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
-      return message2.content.some((part) => part.type === "tool-call");
+  consumeUntilRole(role) {
+    while (this.pendingRolesOnTurn.length > 0) {
+      const nextRole = this.pendingRolesOnTurn[0];
+      if (nextRole === role) break;
+      this.pendingRolesOnTurn.pop();
     }
-    return false;
-  });
-};
-var reverseSegmentRoles = (segment) => {
-  return segment.map((message2) => {
-    const hasStringContent = typeof message2.content === "string";
-    if (!hasStringContent) return message2;
-    const roleMap = {
-      [userMessageRole]: assistantMessageRole,
-      [assistantMessageRole]: userMessageRole
-    };
-    const newRole = roleMap[message2.role];
-    if (!newRole) return message2;
+  }
+  reachedMaxTurns(errorMessage) {
+    var _a;
+    const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
+    const agentTimes = agentRoleAgentsIdx.map((i) => this.agentTimes.get(i) || 0);
+    const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
     return {
-      role: newRole,
-      content: message2.content
+      success: false,
+      messages: this.state.messages,
+      reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
+      metCriteria: [],
+      unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? [],
+      totalTime: this.totalTime,
+      agentTime: totalAgentTime
     };
-  });
-};
-var messageRoleReversal = (messages) => {
-  const segments = groupMessagesByToolBoundaries(messages);
-  const processedSegments = segments.map(
-    (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
-  );
-  return processedSegments.flat();
-};
-var criterionToParamName = (criterion) => {
-  return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
-};
-// src/config/index.ts
-var logger = new Logger("scenario.config");
-var configLoaded = false;
-var config = null;
-var configLoadPromise = null;
-async function loadProjectConfig() {
-  if (configLoaded) {
-    return;
   }
-  if (configLoadPromise) {
-    return configLoadPromise;
+  getJudgeAgent() {
+    return this.agents.find((agent2) => agent2 instanceof JudgeAgentAdapter) ?? null;
   }
-  configLoadPromise = (async () => {
-    try {
-      config = await loadScenarioProjectConfig();
-      logger.info("loaded scenario project config", { config });
-    } catch (error) {
-      logger.error("error loading scenario project config", { error });
-    } finally {
-      configLoaded = true;
-    }
-  })();
-  return configLoadPromise;
-}
-async function getProjectConfig() {
-  await loadProjectConfig();
-  return config;
-}
-// src/utils/config.ts
-function mergeConfig(config2, projectConfig) {
-  if (!projectConfig) {
-    return config2;
+  /**
+   * Emits an event to the event stream for external consumption.
+   */
+  emitEvent(event) {
+    this.eventSubject.next(event);
   }
-  return {
-    ...projectConfig.defaultModel,
-    ...config2
-  };
-}
-function mergeAndValidateConfig(config2, projectConfig) {
-  var _a;
-  const mergedConfig = mergeConfig(config2, projectConfig);
-  mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
-  if (!mergedConfig.model) {
-    throw new Error("Model is required");
+  /**
+   * Creates base event properties shared across all scenario events.
+   */
+  makeBaseEvent({ scenarioRunId }) {
+    return {
+      type: "placeholder",
+      // This will be replaced by the specific event type
+      timestamp: Date.now(),
+      batchRunId: batchRunId2,
+      scenarioId: this.config.id,
+      scenarioRunId,
+      scenarioSetId: this.config.setId
+    };
   }
-  return mergedConfig;
-}
-// src/agents/judge-agent.ts
-function buildSystemPrompt(criteria, description) {
-  const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
-  return `
-<role>
-You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
-</role>
-<goal>
-Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
-If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
-</goal>
-<scenario>
-${description}
-</scenario>
-<criteria>
-${criteriaList}
-</criteria>
-<rules>
-- Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
-- DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
-</rules>
-`.trim();
-}
-function buildContinueTestTool() {
-  return tool({
-    description: "Continue the test with the next step",
-    parameters: z3.object({})
-  });
-}
-function buildFinishTestTool(criteria) {
-  const criteriaNames = criteria.map(criterionToParamName);
-  return tool({
-    description: "Complete the test with a final verdict",
-    parameters: z3.object({
-      criteria: z3.object(
-        Object.fromEntries(
-          criteriaNames.map((name, idx) => [
-            name,
-            z3.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
-          ])
-        )
-      ).strict().describe("Strict verdict for each criterion"),
-      reasoning: z3.string().describe("Explanation of what the final verdict should be"),
-      verdict: z3.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
-    })
-  });
-}
-var judgeAgent = (cfg) => {
-  return {
-    role: "Judge" /* JUDGE */,
-    criteria: cfg.criteria,
-    call: async (input) => {
-      var _a;
-      const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
-      const messages = [
-        { role: "system", content: systemPrompt },
-        ...input.messages
-      ];
-      const isLastMessage = input.scenarioState.turn == input.scenarioConfig.maxTurns;
-      const projectConfig = await getProjectConfig();
-      const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
-      if (!mergedConfig.model) {
-        throw new Error("Model is required for the judge agent");
+  /**
+   * Emits a run started event to indicate scenario execution has begun.
+   */
+  emitRunStarted({ scenarioRunId }) {
+    this.emitEvent({
+      ...this.makeBaseEvent({ scenarioRunId }),
+      type: "SCENARIO_RUN_STARTED" /* RUN_STARTED */,
+      metadata: {
+        name: this.config.name,
+        description: this.config.description
       }
-      const tools = {
-        continue_test: buildContinueTestTool(),
-        finish_test: buildFinishTestTool(cfg.criteria)
-      };
-      const enforceJudgement = input.judgmentRequest;
-      const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
-      if (enforceJudgement && !hasCriteria) {
-        return {
-          success: false,
-          messages: [],
-          reasoning: "JudgeAgent: No criteria was provided to be judged against",
-          passedCriteria: [],
-          failedCriteria: []
-        };
+    });
+  }
+  /**
+   * Emits a message snapshot event containing current conversation history.
+   */
+  emitMessageSnapshot({ scenarioRunId }) {
+    this.emitEvent({
+      ...this.makeBaseEvent({ scenarioRunId }),
+      type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
+      messages: this.state.messages
+      // Add any other required fields from MessagesSnapshotEventSchema
+    });
+  }
+  /**
+   * Emits a run finished event with the final execution status.
+   */
+  emitRunFinished({
+    scenarioRunId,
+    status,
+    result
+  }) {
+    const event = {
+      ...this.makeBaseEvent({ scenarioRunId }),
+      scenarioSetId: this.config.setId ?? "default",
+      type: "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */,
+      status,
+      results: {
+        verdict: (result == null ? void 0 : result.success) ? "success" /* SUCCESS */ : "failure" /* FAILURE */,
+        metCriteria: (result == null ? void 0 : result.metCriteria) ?? [],
+        unmetCriteria: (result == null ? void 0 : result.unmetCriteria) ?? [],
+        reasoning: result == null ? void 0 : result.reasoning,
+        error: result == null ? void 0 : result.error
       }
-      const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
-      const completion = await generateText({
-        model: mergedConfig.model,
-        messages,
-        temperature: mergedConfig.temperature ?? 0,
-        maxTokens: mergedConfig.maxTokens,
-        tools,
-        toolChoice
-      });
-      let args;
-      if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
-        const toolCall = completion.toolCalls[0];
-        switch (toolCall.toolName) {
-          case "finish_test": {
-            args = toolCall.args;
-            const verdict = args.verdict || "inconclusive";
-            const reasoning = args.reasoning || "No reasoning provided";
-            const criteria = args.criteria || {};
-            const criteriaValues = Object.values(criteria);
-            const passedCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] === "true");
-            const failedCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] !== "true");
-            return {
-              success: verdict === "success",
-              messages: input.messages,
-              reasoning,
-              passedCriteria,
-              failedCriteria
-            };
-          }
-          case "continue_test":
-            return [];
-          default:
-            return {
-              success: false,
-              messages: input.messages,
-              reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
-              passedCriteria: [],
-              failedCriteria: cfg.criteria
-            };
-        }
+    };
+    this.emitEvent(event);
+    this.eventSubject.complete();
+  }
+  /**
+   * Distributes a message to all other agents in the scenario.
+   *
+   * @param message - The message to broadcast.
+   * @param fromAgentIdx - The index of the agent that sent the message, to avoid echoing.
+   */
+  broadcastMessage(message2, fromAgentIdx) {
+    for (let idx = 0; idx < this.agents.length; idx++) {
+      if (idx === fromAgentIdx) continue;
+      if (!this.pendingMessages.has(idx)) {
+        this.pendingMessages.set(idx, []);
       }
-      return {
-        success: false,
-        messages: input.messages,
-        reasoning: `JudgeAgent: No tool call found in LLM output`,
-        passedCriteria: [],
-        failedCriteria: cfg.criteria
-      };
+      this.pendingMessages.get(idx).push(message2);
     }
-  };
+  }
 };
+function convertAgentReturnTypesToMessages(response, role) {
+  if (typeof response === "string")
+    return [{ role, content: response }];
+  if (Array.isArray(response))
+    return response;
+  if (typeof response === "object" && "role" in response)
+    return [response];
+  return [];
+}
-// src/agents/user-simulator-agent.ts
-import { generateText as generateText2 } from "ai";
-function buildSystemPrompt2(description) {
-  return `
-<role>
-You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
-Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
-</role>
-<goal>
-Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
-</goal>
+// src/runner/index.ts
+var runner_exports = {};
+__export(runner_exports, {
+  run: () => run
+});
-<scenario>
-${description}
-</scenario>
+// src/script/index.ts
+var script_exports = {};
+__export(script_exports, {
+  agent: () => agent,
+  fail: () => fail,
+  judge: () => judge,
+  message: () => message,
+  proceed: () => proceed,
+  succeed: () => succeed,
+  user: () => user
+});
+var message = (message2) => {
+  return (_state, executor) => executor.message(message2);
+};
+var agent = (content) => {
+  return (_state, executor) => executor.agent(content);
+};
+var judge = (content) => {
+  return (_state, executor) => executor.judge(content);
+};
+var user = (content) => {
+  return (_state, executor) => executor.user(content);
+};
+var proceed = (turns, onTurn, onStep) => {
+  return (_state, executor) => executor.proceed(turns, onTurn, onStep);
+};
+var succeed = (reasoning) => {
+  return (_state, executor) => executor.succeed(reasoning);
+};
+var fail = (reasoning) => {
+  return (_state, executor) => executor.fail(reasoning);
+};
-<rules>
-- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
-</rules>
-`.trim();
+// src/runner/run.ts
+async function run(cfg) {
+  if (!cfg.name) {
+    throw new Error("Scenario name is required");
+  }
+  if (!cfg.description) {
+    throw new Error("Scenario description is required");
+  }
+  if ((cfg.maxTurns || 10) < 1) {
+    throw new Error("Max turns must be at least 1");
+  }
+  if (cfg.agents.length === 0) {
+    throw new Error("At least one agent is required");
+  }
+  if (!cfg.agents.find((agent2) => agent2.role === "Agent" /* AGENT */)) {
+    throw new Error("At least one non-user/non-judge agent is required");
+  }
+  cfg.agents.forEach((agent2, i) => {
+    if (!allAgentRoles.includes(agent2.role)) {
+      throw new Error(`Agent ${i} has invalid role: ${agent2.role}`);
+    }
+  });
+  if (!cfg.threadId) {
+    cfg.threadId = generateThreadId();
+  }
+  const steps = cfg.script || [proceed()];
+  const execution = new ScenarioExecution(cfg, steps);
+  let eventBus = null;
+  let subscription = null;
+  try {
+    const projectConfig = await loadScenarioProjectConfig();
+    eventBus = new EventBus({
+      endpoint: projectConfig.langwatchEndpoint ?? process.env.LANGWATCH_ENDPOINT ?? "https://app.langwatch.ai",
+      apiKey: projectConfig.langwatchApiKey ?? process.env.LANGWATCH_API_KEY
+    });
+    eventBus.listen();
+    subscription = eventBus.subscribeTo(execution.events$);
+    const result = await execution.execute();
+    if (cfg.verbose && !result.success) {
+      console.log(`Scenario failed: ${cfg.name}`);
+      console.log(`Reasoning: ${result.reasoning}`);
+      console.log("--------------------------------");
+      console.log(`Met criteria: ${result.metCriteria.join("\n- ")}`);
+      console.log(`Unmet criteria: ${result.unmetCriteria.join("\n- ")}`);
+      console.log(result.messages.map(formatMessage).join("\n"));
+    }
+    return result;
+  } finally {
+    await (eventBus == null ? void 0 : eventBus.drain());
+    subscription == null ? void 0 : subscription.unsubscribe();
+  }
 }
-var userSimulatorAgent = (config2) => {
-  return {
-    role: "User" /* USER */,
-    call: async (input) => {
-      const systemPrompt = buildSystemPrompt2(input.scenarioConfig.description);
-      const messages = [
-        { role: "system", content: systemPrompt },
-        { role: "assistant", content: "Hello, how can I help you today" },
-        ...input.messages
-      ];
-      const projectConfig = await getProjectConfig();
-      const mergedConfig = mergeAndValidateConfig(config2 ?? {}, projectConfig);
-      if (!mergedConfig.model) {
-        throw new Error("Model is required for the user simulator agent");
-      }
-      const reversedMessages = messageRoleReversal(messages);
-      const completion = await generateText2({
-        model: mergedConfig.model,
-        messages: reversedMessages,
-        temperature: mergedConfig.temperature ?? 0,
-        maxTokens: mergedConfig.maxTokens
-      });
-      const messageContent = completion.text;
-      if (!messageContent) {
-        throw new Error("No response content from LLM");
-      }
-      return { role: "user", content: messageContent };
+function formatMessage(m) {
+  switch (m.role) {
+    case "user":
+      return `User: ${m.content}`;
+    case "assistant":
+      return `Assistant: ${formatParts(m.content)}`;
+    case "tool":
+      return `Tool: ${formatParts(m.content)}`;
+    default:
+      return `${m.role}: ${m.content}`;
+  }
+}
+function formatParts(part) {
+  if (typeof part === "string") {
+    return part;
+  }
+  if (Array.isArray(part)) {
+    if (part.length === 1) {
+      return formatPart(part[0]);
     }
-  };
+    return `
+${part.map(formatPart).join("\n")}`;
+  }
+  return "Unknown content: " + JSON.stringify(part);
+}
+function formatPart(part) {
+  switch (part.type) {
+    case "text":
+      return part.text;
+    case "file":
+      return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
+    case "tool-call":
+      return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
+    case "tool-result":
+      return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
+    case "reasoning":
+      return `(reasoning): ${part.text}`;
+    case "redacted-reasoning":
+      return `(redacted reasoning): ${part.data}`;
+    default:
+      return `Unknown content: ${JSON.stringify(part)}`;
+  }
+}
+// src/index.ts
+var scenario = {
+  ...agents_exports,
+  ...domain_exports,
+  ...execution_exports,
+  ...runner_exports,
+  ...script_exports
 };
+var index_default = scenario;
 export {
   AgentAdapter,
   AgentRole,
@@ -1375,6 +1145,7 @@ export {
   UserSimulatorAgentAdapter,
   agent,
   allAgentRoles,
+  index_default as default,
   defineConfig,
   fail,
   judge,
@@ -1382,6 +1153,7 @@ export {
   message,
   proceed,
   run,
+  scenario,
   scenarioProjectConfigSchema,
   succeed,
   user,