PyPI - langwatch-scenario - Versions diffs - 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

langwatch-scenario 0.3.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

langwatch_scenario-0.6.0.dist-info/METADATA +385 -0
langwatch_scenario-0.6.0.dist-info/RECORD +27 -0
scenario/__init__.py +128 -17
scenario/{error_messages.py → _error_messages.py} +8 -38
scenario/_utils/__init__.py +32 -0
scenario/_utils/ids.py +58 -0
scenario/_utils/message_conversion.py +103 -0
scenario/_utils/utils.py +425 -0
scenario/agent_adapter.py +115 -0
scenario/cache.py +134 -9
scenario/config.py +156 -10
scenario/events/__init__.py +66 -0
scenario/events/event_bus.py +175 -0
scenario/events/event_reporter.py +83 -0
scenario/events/events.py +169 -0
scenario/events/messages.py +84 -0
scenario/events/utils.py +86 -0
scenario/judge_agent.py +414 -0
scenario/pytest_plugin.py +177 -14
scenario/scenario_executor.py +630 -154
scenario/scenario_state.py +205 -0
scenario/script.py +361 -0
scenario/types.py +197 -20
scenario/user_simulator_agent.py +242 -0
langwatch_scenario-0.3.0.dist-info/METADATA +0 -302
langwatch_scenario-0.3.0.dist-info/RECORD +0 -16
scenario/scenario.py +0 -238
scenario/scenario_agent_adapter.py +0 -16
scenario/testing_agent.py +0 -279
scenario/utils.py +0 -264
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.6.0.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.6.0.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.6.0.dist-info}/top_level.txt +0 -0

scenario/scenario_executor.py CHANGED Viewed

@@ -1,134 +1,456 @@
 """
-ScenarioExecutor module: holds the scenario execution logic and state, orchestrating the conversation between the testing agent and the agent under test.
+Scenario execution engine for agent testing.
+This module contains the core ScenarioExecutor class that orchestrates the execution
+of scenario tests, managing the interaction between user simulators, agents under test,
+and judge agents to determine test success or failure.
 """
 import sys
 from typing import (
-    TYPE_CHECKING,
     Awaitable,
     Callable,
     Dict,
     List,
-    Any,
     Optional,
     Set,
     Tuple,
     Union,
+    TypedDict,
 )
 import time
 import termcolor
+import asyncio
+import concurrent.futures
-from scenario.utils import (
-    await_if_awaitable,
+from scenario.config import ScenarioConfig
+from scenario._utils import (
     check_valid_return_type,
     convert_agent_return_types_to_openai_messages,
     print_openai_messages,
     show_spinner,
+    await_if_awaitable,
+    get_or_create_batch_run_id,
+    generate_scenario_run_id,
 )
 from openai.types.chat import (
     ChatCompletionMessageParam,
     ChatCompletionUserMessageParam,
-    ChatCompletionMessageToolCallParam,
+    ChatCompletionAssistantMessageParam,
 )
-from .types import AgentInput, ScenarioAgentRole, ScenarioResult, ScriptStep
-from .error_messages import agent_response_not_awaitable
+from .types import AgentInput, AgentRole, ScenarioResult, ScriptStep
+from ._error_messages import agent_response_not_awaitable
 from .cache import context_scenario
-from .scenario_agent_adapter import ScenarioAgentAdapter
+from .agent_adapter import AgentAdapter
+from .script import proceed
 from pksuid import PKSUID
-if TYPE_CHECKING:
-    from scenario.scenario import Scenario
+from .scenario_state import ScenarioState
+from .events import (
+    ScenarioEventBus,
+    ScenarioRunStartedEvent,
+    ScenarioMessageSnapshotEvent,
+    ScenarioRunFinishedEvent,
+    ScenarioRunStartedEventMetadata,
+    ScenarioRunFinishedEventResults,
+    ScenarioRunFinishedEventVerdict,
+    ScenarioRunFinishedEventStatus,
+    convert_messages_to_ag_ui_messages,
+)
 class ScenarioExecutor:
-    scenario: "Scenario"
-    messages: List[ChatCompletionMessageParam]
-    thread_id: str
-    current_turn: int
-    _context: Optional[Dict[str, Any]]
-    _script: List[ScriptStep]
-    _agents: List[ScenarioAgentAdapter]
+    """
+    Core orchestrator for scenario-based agent testing.
+    The ScenarioExecutor manages the complete lifecycle of a scenario test, including:
+    - Orchestrating conversations between user simulators, agents, and judges
+    - Managing turn-based execution flow
+    - Handling script-based scenario control
+    - Collecting and reporting test results
+    - Supporting debug mode for interactive testing
+    This class serves as both a builder (for configuration) and an executor (for running tests).
+    Most users will interact with it through the high-level `scenario.run()` function rather
+    than instantiating it directly.
+    Attributes:
+        name: Human-readable name for the scenario
+        description: Detailed description of what the scenario tests
+        agents: List of agent adapters participating in the scenario
+        script: Optional list of script steps to control scenario flow
+        config: Configuration settings for execution behavior
+    Example:
+        ```
+        # Direct instantiation (less common)
+        executor = ScenarioExecutor(
+           name="weather query test",
+           description="User asks about weather, agent should provide helpful response",
+           agents=[
+               weather_agent,
+               scenario.UserSimulatorAgent(),
+               scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
+           ],
+           max_turns=10,
+           verbose=True
+        )
+        result = await executor._run()
+        # Preferred high-level API
+        result = await scenario.run(
+           name="weather query test",
+           description="User asks about weather, agent should provide helpful response",
+           agents=[
+               weather_agent,
+               scenario.UserSimulatorAgent(),
+               scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
+           ]
+        )
+        ```
+    Note:
+        - Scenarios run in isolated thread pools to support parallel execution
+        - All agent interactions are cached when cache_key is configured
+        - Debug mode allows step-by-step execution with user intervention
+        - Results include detailed timing information and conversation history
+    """
+    name: str
+    description: str
+    agents: List[AgentAdapter]
+    script: List[ScriptStep]
+    config: ScenarioConfig
+    _state: ScenarioState
     _total_start_time: float
     _pending_messages: Dict[int, List[ChatCompletionMessageParam]]
-    _pending_roles_on_turn: List[ScenarioAgentRole] = []
-    _pending_agents_on_turn: Set[ScenarioAgentAdapter] = set()
+    _pending_roles_on_turn: List[AgentRole] = []
+    _pending_agents_on_turn: Set[AgentAdapter] = set()
     _agent_times: Dict[int, float] = {}
+    event_bus: ScenarioEventBus
+    batch_run_id: str
     def __init__(
         self,
-        scenario: "Scenario",
-        context: Optional[Dict[str, Any]] = None,
+        name: str,
+        description: str,
+        agents: List[AgentAdapter] = [],
         script: Optional[List[ScriptStep]] = None,
+        # Config
+        max_turns: Optional[int] = None,
+        verbose: Optional[Union[bool, int]] = None,
+        cache_key: Optional[str] = None,
+        debug: Optional[bool] = None,
+        event_bus: Optional[ScenarioEventBus] = None,
     ):
-        super().__init__()
+        """
+        Initialize a scenario executor.
+        Args:
+            name: Human-readable name for the scenario (used in reports and logs)
+            description: Detailed description of what the scenario tests.
+                        This guides the user simulator's behavior and provides context.
+            agents: List of agent adapters participating in the scenario.
+                   Typically includes: agent under test, user simulator, and judge.
+            script: Optional list of script steps to control scenario flow.
+                   If not provided, defaults to automatic proceeding.
+            max_turns: Maximum number of conversation turns before timeout.
+                      Overrides global configuration for this scenario.
+            verbose: Whether to show detailed output during execution.
+                    Can be True/False or integer level (2 for extra details).
+            cache_key: Cache key for deterministic behavior across runs.
+                      Overrides global configuration for this scenario.
+            debug: Whether to enable debug mode with step-by-step execution.
+                  Overrides global configuration for this scenario.
+            event_reporter: Optional event reporter for the scenario
+        Example:
+            ```python
+            executor = ScenarioExecutor(
+                name="customer service test",
+                description="Customer has a billing question and needs help",
+                agents=[
+                    customer_service_agent,
+                    scenario.UserSimulatorAgent(),
+                    scenario.JudgeAgent(criteria=[
+                        "Agent is polite and professional",
+                        "Agent addresses the billing question",
+                        "Agent provides clear next steps"
+                    ])
+                ],
+                max_turns=15,
+                verbose=True,
+                debug=False
+            )
+            ```
+        """
+        self.name = name
+        self.description = description
+        self.agents = agents
+        self.script = script or [proceed()]
+        config = ScenarioConfig(
+            max_turns=max_turns,
+            verbose=verbose,
+            cache_key=cache_key,
+            debug=debug,
+        )
+        self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
-        self.scenario = scenario.model_copy()
-        self._context = context
-        self._script = script or [scenario.proceed()]
-        self.current_turn = 0
         self.reset()
+        self.event_bus = event_bus or ScenarioEventBus()
+        self.batch_run_id = get_or_create_batch_run_id()
+    @classmethod
+    async def run(
+        cls,
+        name: str,
+        description: str,
+        agents: List[AgentAdapter] = [],
+        max_turns: Optional[int] = None,
+        verbose: Optional[Union[bool, int]] = None,
+        cache_key: Optional[str] = None,
+        debug: Optional[bool] = None,
+        script: Optional[List[ScriptStep]] = None,
+    ) -> ScenarioResult:
+        """
+        High-level interface for running a scenario test.
+        This is the main entry point for executing scenario tests. It creates a
+        ScenarioExecutor instance and runs it in an isolated thread pool to support
+        parallel execution and prevent blocking.
+        Args:
+            name: Human-readable name for the scenario
+            description: Detailed description of what the scenario tests
+            agents: List of agent adapters (agent under test, user simulator, judge)
+            max_turns: Maximum conversation turns before timeout (default: 10)
+            verbose: Show detailed output during execution
+            cache_key: Cache key for deterministic behavior
+            debug: Enable debug mode for step-by-step execution
+            script: Optional script steps to control scenario flow
+        Returns:
+            ScenarioResult containing the test outcome, conversation history,
+            success/failure status, and detailed reasoning
+        Example:
+            ```
+            import scenario
+            # Simple scenario with automatic flow
+            result = await scenario.run(
+               name="help request",
+               description="User asks for help with a technical problem",
+               agents=[
+                   my_agent,
+                   scenario.UserSimulatorAgent(),
+                   scenario.JudgeAgent(criteria=["Agent provides helpful response"])
+               ]
+            )
+            # Scripted scenario with custom evaluations
+            result = await scenario.run(
+               name="custom interaction",
+               description="Test specific conversation flow",
+               agents=[
+                   my_agent,
+                   scenario.UserSimulatorAgent(),
+                   scenario.JudgeAgent(criteria=["Agent provides helpful response"])
+               ],
+               script=[
+                   scenario.user("Hello"),
+                   scenario.agent(),
+                   custom_eval,
+                   scenario.succeed()
+               ]
+            )
+            # Results analysis
+            print(f"Test {'PASSED' if result.success else 'FAILED'}")
+            print(f"Reasoning: {result.reasoning}")
+            print(f"Conversation had {len(result.messages)} messages")
+            ```
+        Note:
+            - Runs in isolated thread pool to support parallel execution
+            - Blocks until scenario completes or times out
+            - All agent calls are automatically cached when cache_key is set
+            - Exception handling ensures clean resource cleanup
+        """
+        scenario = cls(
+            name=name,
+            description=description,
+            agents=agents,
+            max_turns=max_turns,
+            verbose=verbose,
+            cache_key=cache_key,
+            debug=debug,
+            script=script,
+        )
+        # We'll use a thread pool to run the execution logic, we
+        # require a separate thread because even though asyncio is
+        # being used throughout, any user code on the callback can
+        # be blocking, preventing them from running scenarios in parallel
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            def run_in_thread():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                try:
+                    return loop.run_until_complete(scenario._run())
+                finally:
+                    loop.run_until_complete(scenario.event_bus.drain())
+                    loop.close()
+            # Run the function in the thread pool and await its result
+            # This converts the thread's execution into a Future that the current
+            # event loop can await without blocking
+            loop = asyncio.get_event_loop()
+            result = await loop.run_in_executor(executor, run_in_thread)
+            return result
     def reset(self):
-        self.messages = []
-        self._agents = []
+        """
+        Reset the scenario executor to initial state.
+        This method reinitializes all internal state for a fresh scenario run,
+        including conversation history, turn counters, and agent timing information.
+        Called automatically during initialization and can be used to rerun scenarios.
+        """
+        self._state = ScenarioState(
+            description=self.description,
+            messages=[],
+            thread_id=str(PKSUID("thread")),
+            current_turn=0,
+            config=self.config,
+            _executor=self,
+        )
+        # Pydantic doesn't actually set the _executor field from the constructor, as it's private, so we need to do it manually
+        self._state._executor = self
         self._pending_messages = {}
-        self.thread_id = str(PKSUID("thread"))
         self._total_start_time = time.time()
         self._agent_times = {}
-        for AgentClass in self.scenario.agents:
-            self._agents.append(
-                AgentClass(
-                    input=AgentInput(
-                        thread_id=self.thread_id,
-                        messages=[],
-                        new_messages=[],
-                        context=self._context or {},
-                        requested_role=list(AgentClass.roles)[0],
-                        scenario_state=self,
-                    )
-                )
-            )
         self._new_turn()
-        self.current_turn = 0
+        self._state.current_turn = 0
-        context_scenario.set(self.scenario)
+        context_scenario.set(self)
     def add_message(
         self, message: ChatCompletionMessageParam, from_agent_idx: Optional[int] = None
     ):
-        self.messages.append(message)
+        """
+        Add a message to the conversation and broadcast to other agents.
+        This method adds a message to the conversation history and makes it available
+        to other agents in their next call. It's used internally by the executor
+        and can be called from script steps to inject custom messages.
+        Args:
+            message: OpenAI-compatible message to add to the conversation
+            from_agent_idx: Index of the agent that generated this message.
+                           Used to avoid broadcasting the message back to its creator.
+        Example:
+            ```
+            def inject_system_message(state: ScenarioState) -> None:
+                state.add_message({
+                    "role": "system",
+                    "content": "The user is now in a hurry"
+                })
+            # Use in script
+            result = await scenario.run(
+               name="system message test",
+               agents=[agent, user_sim, judge],
+               script=[
+                   scenario.user("Hello"),
+                   scenario.agent(),
+                   inject_system_message,
+                   scenario.user(),  # Will see the system message
+                   scenario.succeed()
+               ]
+            )
+            ```
+        """
+        self._state.messages.append(message)
         # Broadcast the message to other agents
-        for idx, _ in enumerate(self._agents):
+        for idx, _ in enumerate(self.agents):
             if idx == from_agent_idx:
                 continue
             if idx not in self._pending_messages:
                 self._pending_messages[idx] = []
             self._pending_messages[idx].append(message)
     def add_messages(
         self,
         messages: List[ChatCompletionMessageParam],
         from_agent_idx: Optional[int] = None,
     ):
+        """
+        Add multiple messages to the conversation.
+        Convenience method for adding multiple messages at once. Each message
+        is added individually using add_message().
+        Args:
+            messages: List of OpenAI-compatible messages to add
+            from_agent_idx: Index of the agent that generated these messages
+        Example:
+            ```
+            # Agent returns multiple messages for a complex interaction
+            messages = [
+                {"role": "assistant", "content": "Let me search for that..."},
+                {"role": "assistant", "content": "Here's what I found: ..."}
+            ]
+            executor.add_messages(messages, from_agent_idx=0)
+            ```
+        """
         for message in messages:
             self.add_message(message, from_agent_idx)
     def _new_turn(self):
-        self._pending_agents_on_turn = set(self._agents)
+        self._pending_agents_on_turn = set(self.agents)
         self._pending_roles_on_turn = [
-            ScenarioAgentRole.USER,
-            ScenarioAgentRole.AGENT,
-            ScenarioAgentRole.JUDGE,
+            AgentRole.USER,
+            AgentRole.AGENT,
+            AgentRole.JUDGE,
         ]
-        self.current_turn += 1
+        self._state.current_turn += 1
     async def step(self) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
+        """
+        Execute a single step in the scenario.
+        A step consists of calling the next agent in the current turn's sequence
+        and processing their response. This method is used internally by the
+        scenario execution flow.
+        Returns:
+            Either a list of messages (if the scenario continues) or a
+            ScenarioResult (if the scenario should end)
+        Raises:
+            ValueError: If no result is returned from the internal step method
+        Note:
+            This is primarily an internal method. Most users should use the
+            high-level run() method or script DSL functions instead.
+        """
         result = await self._step()
         if result is None:
             raise ValueError("No result from step")
@@ -139,8 +461,8 @@ class ScenarioExecutor:
         go_to_next_turn=True,
         on_turn: Optional[
             Union[
-                Callable[["ScenarioExecutor"], None],
-                Callable[["ScenarioExecutor"], Awaitable[None]],
+                Callable[["ScenarioState"], None],
+                Callable[["ScenarioState"], Awaitable[None]],
             ]
         ] = None,
     ) -> Union[List[ChatCompletionMessageParam], ScenarioResult, None]:
@@ -151,9 +473,9 @@ class ScenarioExecutor:
             self._new_turn()
             if on_turn:
-                await await_if_awaitable(on_turn(self))
+                await await_if_awaitable(on_turn(self._state))
-            if self.current_turn >= (self.scenario.max_turns or 10):
+            if self._state.current_turn >= (self.config.max_turns or 10):
                 return self._reached_max_turns()
         current_role = self._pending_roles_on_turn[0]
@@ -166,10 +488,14 @@ class ScenarioExecutor:
         return await self._call_agent(idx, role=current_role)
     def _next_agent_for_role(
-        self, role: ScenarioAgentRole
-    ) -> Tuple[int, Optional[ScenarioAgentAdapter]]:
-        for idx, agent in enumerate(self._agents):
-            if role in agent.roles and agent in self._pending_agents_on_turn:
+        self, role: AgentRole
+    ) -> Tuple[int, Optional[AgentAdapter]]:
+        for idx, agent in enumerate(self.agents):
+            if (
+                role == agent.role
+                and agent in self._pending_agents_on_turn
+                and agent.role in self._pending_roles_on_turn
+            ):
                 return idx, agent
         return -1, None
@@ -177,8 +503,8 @@ class ScenarioExecutor:
         # If we reached max turns without conclusion, fail the test
         agent_roles_agents_idx = [
             idx
-            for idx, agent in enumerate(self._agents)
-            if ScenarioAgentRole.AGENT in agent.roles
+            for idx, agent in enumerate(self.agents)
+            if agent.role == AgentRole.AGENT
         ]
         agent_times = [
             self._agent_times[idx]
@@ -189,14 +515,14 @@ class ScenarioExecutor:
         return ScenarioResult(
             success=False,
-            messages=self.messages,
+            messages=self._state.messages,
             reasoning=error_message
-            or f"Reached maximum turns ({self.scenario.max_turns or 10}) without conclusion",
+            or f"Reached maximum turns ({self.config.max_turns or 10}) without conclusion",
             total_time=time.time() - self._total_start_time,
             agent_time=agent_time,
         )
-    async def run(self) -> ScenarioResult:
+    async def _run(self) -> ScenarioResult:
         """
         Run a scenario against the agent under test.
@@ -206,37 +532,61 @@ class ScenarioExecutor:
         Returns:
             ScenarioResult containing the test outcome
         """
+        scenario_run_id = generate_scenario_run_id()
-        if self.scenario.verbose:
-            print("")  # new line
+        try:
+            await self.event_bus.listen()
+            self._emit_run_started_event(scenario_run_id)
-        self.reset()
+            if self.config.verbose:
+                print("")  # new line
-        for script_step in self._script:
-            callable = script_step(self)
-            if isinstance(callable, Awaitable):
-                result = await callable
-            else:
-                result = callable
+            self.reset()
+            for script_step in self.script:
+                callable = script_step(self._state)
+                if isinstance(callable, Awaitable):
+                    result = await callable
+                else:
+                    result = callable
+                self._emit_message_snapshot_event(scenario_run_id)
-            if isinstance(result, ScenarioResult):
-                return result
+                if isinstance(result, ScenarioResult):
+                    status = ScenarioRunFinishedEventStatus.SUCCESS if result.success else ScenarioRunFinishedEventStatus.FAILED
+                    self._emit_run_finished_event(scenario_run_id, result, status)
+                    return result
-        return self._reached_max_turns(
-            """Reached end of script without conclusion, add one of the following to the end of the script:
+            result = self._reached_max_turns(
+                """Reached end of script without conclusion, add one of the following to the end of the script:
 - `scenario.proceed()` to let the simulation continue to play out
 - `scenario.judge()` to force criteria judgement
 - `scenario.succeed()` or `scenario.fail()` to end the test with an explicit result
-            """
-        )
+                """
+            )
+            status = ScenarioRunFinishedEventStatus.SUCCESS if result.success else ScenarioRunFinishedEventStatus.FAILED
+            self._emit_run_finished_event(scenario_run_id, result, status)
+            return result
+        except Exception as e:
+            # Publish failure event before propagating the error
+            error_result = ScenarioResult(
+                success=False,
+                messages=self._state.messages,
+                reasoning=f"Scenario failed with error: {str(e)}",
+                total_time=time.time() - self._total_start_time,
+                agent_time=0,
+            )
+            self._emit_run_finished_event(scenario_run_id, error_result, ScenarioRunFinishedEventStatus.ERROR)
+            raise  # Re-raise the exception after cleanup
     async def _call_agent(
-        self, idx: int, role: ScenarioAgentRole
+        self, idx: int, role: AgentRole, request_judgment: bool = False
     ) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
-        agent = self._agents[idx]
+        agent = self.agents[idx]
-        if role == ScenarioAgentRole.USER and self.scenario.debug:
+        if role == AgentRole.USER and self.config.debug:
             print(
                 f"\n{self._scenario_name()}{termcolor.colored('[Debug Mode]', 'yellow')} Press enter to continue or type a message to send"
             )
@@ -258,28 +608,26 @@ class ScenarioExecutor:
         with show_spinner(
             text=(
                 "Judging..."
-                if role == ScenarioAgentRole.JUDGE
-                else f"{role.value if isinstance(role, ScenarioAgentRole) else role}:"
+                if role == AgentRole.JUDGE
+                else f"{role.value if isinstance(role, AgentRole) else role}:"
             ),
             color=(
                 "blue"
-                if role == ScenarioAgentRole.AGENT
-                else "green" if role == ScenarioAgentRole.USER else "yellow"
+                if role == AgentRole.AGENT
+                else "green" if role == AgentRole.USER else "yellow"
             ),
-            enabled=self.scenario.verbose,
+            enabled=self.config.verbose,
         ):
             start_time = time.time()
             agent_response = agent.call(
                 AgentInput(
                     # TODO: test thread_id
-                    thread_id=self.thread_id,
-                    messages=self.messages,
+                    thread_id=self._state.thread_id,
+                    messages=self._state.messages,
                     new_messages=self._pending_messages.get(idx, []),
-                    # TODO: test context
-                    context=self._context or {},
-                    requested_role=role,
-                    scenario_state=self,
+                    judgment_request=request_judgment,
+                    scenario_state=self._state,
                 )
             )
             if not isinstance(agent_response, Awaitable):
@@ -303,12 +651,12 @@ class ScenarioExecutor:
             else:
                 messages = convert_agent_return_types_to_openai_messages(
                     agent_response,
-                    role="user" if role == ScenarioAgentRole.USER else "assistant",
+                    role="user" if role == AgentRole.USER else "assistant",
                 )
             self.add_messages(messages, from_agent_idx=idx)
-            if messages and self.scenario.verbose:
+            if messages and self.config.verbose:
                 print_openai_messages(
                     self._scenario_name(),
                     [m for m in messages if m["role"] != "system"],
@@ -317,75 +665,51 @@ class ScenarioExecutor:
             return messages
     def _scenario_name(self):
-        if self.scenario.verbose == 2:
-            return termcolor.colored(f"[Scenario: {self.scenario.name}] ", "yellow")
+        if self.config.verbose == 2:
+            return termcolor.colored(f"[Scenario: {self.name}] ", "yellow")
         else:
             return ""
-    # State access utils
-    def last_message(self) -> ChatCompletionMessageParam:
-        if len(self.messages) == 0:
-            raise ValueError("No messages found")
-        return self.messages[-1]
-    def last_user_message(self) -> ChatCompletionUserMessageParam:
-        user_messages = [m for m in self.messages if m["role"] == "user"]
-        if not user_messages:
-            raise ValueError("No user messages found")
-        return user_messages[-1]
-    def last_tool_call(
-        self, tool_name: str
-    ) -> Optional[ChatCompletionMessageToolCallParam]:
-        for message in reversed(self.messages):
-            if message["role"] == "assistant" and "tool_calls" in message:
-                for tool_call in message["tool_calls"]:
-                    if tool_call["function"]["name"] == tool_name:
-                        return tool_call
-        return None
-    def has_tool_call(self, tool_name: str) -> bool:
-        return self.last_tool_call(tool_name) is not None
     # Scripting utils
     async def message(self, message: ChatCompletionMessageParam) -> None:
         if message["role"] == "user":
-            await self._script_call_agent(ScenarioAgentRole.USER, message)
+            await self._script_call_agent(AgentRole.USER, message)
         elif message["role"] == "assistant":
-            await self._script_call_agent(ScenarioAgentRole.AGENT, message)
+            await self._script_call_agent(AgentRole.AGENT, message)
         else:
             self.add_message(message)
     async def user(
         self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
     ) -> None:
-        await self._script_call_agent(ScenarioAgentRole.USER, content)
+        await self._script_call_agent(AgentRole.USER, content)
     async def agent(
         self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
     ) -> None:
-        await self._script_call_agent(ScenarioAgentRole.AGENT, content)
+        await self._script_call_agent(AgentRole.AGENT, content)
     async def judge(
         self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
     ) -> Optional[ScenarioResult]:
-        return await self._script_call_agent(ScenarioAgentRole.JUDGE, content)
+        return await self._script_call_agent(
+            AgentRole.JUDGE, content, request_judgment=True
+        )
     async def proceed(
         self,
         turns: Optional[int] = None,
         on_turn: Optional[
             Union[
-                Callable[["ScenarioExecutor"], None],
-                Callable[["ScenarioExecutor"], Awaitable[None]],
+                Callable[["ScenarioState"], None],
+                Callable[["ScenarioState"], Awaitable[None]],
             ]
         ] = None,
         on_step: Optional[
             Union[
-                Callable[["ScenarioExecutor"], None],
-                Callable[["ScenarioExecutor"], Awaitable[None]],
+                Callable[["ScenarioState"], None],
+                Callable[["ScenarioState"], Awaitable[None]],
             ]
         ] = None,
     ) -> Optional[ScenarioResult]:
@@ -396,71 +720,223 @@ class ScenarioExecutor:
                 go_to_next_turn=(
                     turns is None
                     or initial_turn is None
-                    or (self.current_turn + 1 < initial_turn + turns)
+                    or (self._state.current_turn + 1 < initial_turn + turns)
                 ),
             )
             if initial_turn is None:
-                initial_turn = self.current_turn
+                initial_turn = self._state.current_turn
             if next_message is None:
                 break
             if on_step:
-                await await_if_awaitable(on_step(self))
+                await await_if_awaitable(on_step(self._state))
             if isinstance(next_message, ScenarioResult):
                 return next_message
-    async def succeed(self) -> ScenarioResult:
+    async def succeed(self, reasoning: Optional[str] = None) -> ScenarioResult:
         return ScenarioResult(
             success=True,
-            messages=self.messages,
-            reasoning="Scenario marked as successful with scenario.succeed()",
-            passed_criteria=self.scenario.criteria,
+            messages=self._state.messages,
+            reasoning=reasoning
+            or "Scenario marked as successful with scenario.succeed()",
         )
-    async def fail(self) -> ScenarioResult:
+    async def fail(self, reasoning: Optional[str] = None) -> ScenarioResult:
         return ScenarioResult(
             success=False,
-            messages=self.messages,
-            reasoning="Scenario marked as failed with scenario.fail()",
-            passed_criteria=self.scenario.criteria,
+            messages=self._state.messages,
+            reasoning=reasoning or "Scenario marked as failed with scenario.fail()",
         )
+    def _consume_until_role(self, role: AgentRole) -> None:
+        while len(self._pending_roles_on_turn) > 0:
+            next_role = self._pending_roles_on_turn[0]
+            if next_role == role:
+                break
+            self._pending_roles_on_turn.pop(0)
     async def _script_call_agent(
         self,
-        role: ScenarioAgentRole,
+        role: AgentRole,
         content: Optional[Union[str, ChatCompletionMessageParam]] = None,
+        request_judgment: bool = False,
     ) -> Optional[ScenarioResult]:
+        self._consume_until_role(role)
         idx, next_agent = self._next_agent_for_role(role)
         if not next_agent:
             self._new_turn()
+            self._consume_until_role(role)
             idx, next_agent = self._next_agent_for_role(role)
             if not next_agent:
+                role_class = (
+                    "a scenario.UserSimulatorAgent()"
+                    if role == AgentRole.USER
+                    else (
+                        "a scenario.JudgeAgent()"
+                        if role == AgentRole.JUDGE
+                        else "your agent"
+                    )
+                )
                 if content:
                     raise ValueError(
-                        f"Cannot generate a message for role `{role.value}` with content `{content}` because no agent with this role was found"
+                        f"Cannot generate a message for role `{role.value}` with content `{content}` because no agent with this role was found, please add {role_class} to the scenario `agents` list"
                     )
                 raise ValueError(
-                    f"Cannot generate a message for role `{role.value}` because no agent with this role was found"
+                    f"Cannot generate a message for role `{role.value}` because no agent with this role was found, please add {role_class} to the scenario `agents` list"
                 )
         self._pending_agents_on_turn.remove(next_agent)
-        self._pending_roles_on_turn.remove(role)
         if content:
             if isinstance(content, str):
-                message = ChatCompletionUserMessageParam(role="user", content=content)
+                message = (
+                    ChatCompletionUserMessageParam(role="user", content=content)
+                    if role == AgentRole.USER
+                    else ChatCompletionAssistantMessageParam(
+                        role="assistant", content=content
+                    )
+                )
             else:
                 message = content
             self.add_message(message)
-            if self.scenario.verbose:
+            if self.config.verbose:
                 print_openai_messages(self._scenario_name(), [message])
             return
-        result = await self._call_agent(idx, role=role)
+        result = await self._call_agent(
+            idx, role=role, request_judgment=request_judgment
+        )
         if isinstance(result, ScenarioResult):
             return result
+    # Event handling methods
+    class _CommonEventFields(TypedDict):
+        """
+        Common fields shared across all scenario events.
+        These fields provide consistent identification and timing information
+        for all events emitted during scenario execution.
+        Attributes:
+            batch_run_id: Unique identifier for the batch of scenario runs
+            scenario_run_id: Unique identifier for this specific scenario run
+            scenario_id: Human-readable name/identifier for the scenario
+            timestamp: Unix timestamp in milliseconds when the event occurred
+        """
+        batch_run_id: str
+        scenario_run_id: str
+        scenario_id: str
+        timestamp: int
+    def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
+        """
+        Create common fields used across all scenario events.
+        This method generates the standard fields that every scenario event
+        must include for proper identification and timing.
+        Args:
+            scenario_run_id: Unique identifier for the current scenario run
+        Returns:
+            Dictionary containing common event fields with current timestamp
+        """
+        return {
+            "batch_run_id": self.batch_run_id,
+            "scenario_run_id": scenario_run_id,
+            "scenario_id": self.name,
+            "timestamp": int(time.time() * 1000),
+        }
+    def _emit_run_started_event(self, scenario_run_id: str) -> None:
+        """
+        Emit a scenario run started event.
+        This event is published when a scenario begins execution. It includes
+        metadata about the scenario such as name and description, and is used
+        to track the start of scenario runs in monitoring systems.
+        Args:
+            scenario_run_id: Unique identifier for the current scenario run
+        Note:
+            This event is automatically published at the beginning of `_run()`
+            and signals the start of scenario execution to any event listeners.
+        """
+        common_fields = self._create_common_event_fields(scenario_run_id)
+        metadata = ScenarioRunStartedEventMetadata(
+            name=self.name,
+            description=self.description,
+        )
+        event = ScenarioRunStartedEvent(
+            **common_fields,
+            metadata=metadata,
+        )
+        self.event_bus.publish(event)
+    def _emit_message_snapshot_event(self, scenario_run_id: str) -> None:
+        """
+        Emit a message snapshot event.
+        This event captures the current state of the conversation during
+        scenario execution. It's published whenever messages are added to
+        the conversation, allowing real-time tracking of scenario progress.
+        Note:
+            This event is automatically published by `add_message()` and
+            `add_messages()` to provide continuous visibility into scenario
+            execution state.
+        """
+        common_fields = self._create_common_event_fields(scenario_run_id)
+        event = ScenarioMessageSnapshotEvent(
+            **common_fields,
+            messages=convert_messages_to_ag_ui_messages(self._state.messages),
+        )
+        self.event_bus.publish(event)
+    def _emit_run_finished_event(
+        self,
+        scenario_run_id: str,
+        result: ScenarioResult,
+        status: ScenarioRunFinishedEventStatus
+    ) -> None:
+        """
+        Emit a scenario run finished event.
+        This event is published when a scenario completes execution, whether
+        successfully or with an error. It includes the final results, verdict,
+        and reasoning for the scenario outcome.
+        Args:
+            scenario_run_id: Unique identifier for the current scenario run
+            result: The final scenario result containing success/failure status
+            status: The execution status (SUCCESS, FAILED, or ERROR)
+        Note:
+            This event is automatically published at the end of `_run()` and
+            signals the completion of scenario execution to any event listeners.
+            It includes detailed results for monitoring and analysis purposes.
+        """
+        common_fields = self._create_common_event_fields(scenario_run_id)
+        results = ScenarioRunFinishedEventResults(
+            verdict=ScenarioRunFinishedEventVerdict.SUCCESS if result.success else ScenarioRunFinishedEventVerdict.FAILURE,
+            reasoning=result.reasoning or "",
+            met_criteria=result.passed_criteria,
+            unmet_criteria=result.failed_criteria,
+        )
+        event = ScenarioRunFinishedEvent(
+            **common_fields,
+            status=status,
+            results=results,
+        )
+        self.event_bus.publish(event)

langwatch-scenario 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

langwatch-scenario 0.3.0py3-none-any.whl → 0.6.0py3-none-any.whl