PyPI - langwatch-scenario - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

langwatch-scenario 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{langwatch_scenario-0.2.0.dist-info → langwatch_scenario-0.3.0.dist-info}/METADATA +60 -12
langwatch_scenario-0.3.0.dist-info/RECORD +16 -0
scenario/__init__.py +13 -3
scenario/config.py +18 -7
scenario/error_messages.py +81 -23
scenario/pytest_plugin.py +1 -1
scenario/scenario.py +135 -20
scenario/scenario_agent_adapter.py +16 -0
scenario/scenario_executor.py +405 -143
scenario/testing_agent.py +75 -58
scenario/types.py +96 -0
scenario/utils.py +148 -5
langwatch_scenario-0.2.0.dist-info/RECORD +0 -15
scenario/result.py +0 -74
{langwatch_scenario-0.2.0.dist-info → langwatch_scenario-0.3.0.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.2.0.dist-info → langwatch_scenario-0.3.0.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.2.0.dist-info → langwatch_scenario-0.3.0.dist-info}/top_level.txt +0 -0

scenario/scenario_executor.py CHANGED Viewed

@@ -2,40 +2,201 @@
 ScenarioExecutor module: holds the scenario execution logic and state, orchestrating the conversation between the testing agent and the agent under test.
 """
-import json
 import sys
-from typing import TYPE_CHECKING, Awaitable, Dict, List, Any, Optional, Union
+from typing import (
+    TYPE_CHECKING,
+    Awaitable,
+    Callable,
+    Dict,
+    List,
+    Any,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
 import time
 import termcolor
-from scenario.error_messages import message_return_error_message
-from scenario.utils import print_openai_messages, safe_attr_or_key, safe_list_at, show_spinner
-from openai.types.chat import ChatCompletionMessageParam
-from .result import ScenarioResult
-from .error_messages import default_config_error_message
+from scenario.utils import (
+    await_if_awaitable,
+    check_valid_return_type,
+    convert_agent_return_types_to_openai_messages,
+    print_openai_messages,
+    show_spinner,
+)
+from openai.types.chat import (
+    ChatCompletionMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionMessageToolCallParam,
+)
+from .types import AgentInput, ScenarioAgentRole, ScenarioResult, ScriptStep
+from .error_messages import agent_response_not_awaitable
 from .cache import context_scenario
+from .scenario_agent_adapter import ScenarioAgentAdapter
+from pksuid import PKSUID
 if TYPE_CHECKING:
     from scenario.scenario import Scenario
 class ScenarioExecutor:
-    def __init__(self, scenario: "Scenario"):
+    scenario: "Scenario"
+    messages: List[ChatCompletionMessageParam]
+    thread_id: str
+    current_turn: int
+    _context: Optional[Dict[str, Any]]
+    _script: List[ScriptStep]
+    _agents: List[ScenarioAgentAdapter]
+    _total_start_time: float
+    _pending_messages: Dict[int, List[ChatCompletionMessageParam]]
+    _pending_roles_on_turn: List[ScenarioAgentRole] = []
+    _pending_agents_on_turn: Set[ScenarioAgentAdapter] = set()
+    _agent_times: Dict[int, float] = {}
+    def __init__(
+        self,
+        scenario: "Scenario",
+        context: Optional[Dict[str, Any]] = None,
+        script: Optional[List[ScriptStep]] = None,
+    ):
+        super().__init__()
         self.scenario = scenario.model_copy()
+        self._context = context
+        self._script = script or [scenario.proceed()]
+        self.current_turn = 0
+        self.reset()
+    def reset(self):
+        self.messages = []
+        self._agents = []
+        self._pending_messages = {}
+        self.thread_id = str(PKSUID("thread"))
+        self._total_start_time = time.time()
+        self._agent_times = {}
+        for AgentClass in self.scenario.agents:
+            self._agents.append(
+                AgentClass(
+                    input=AgentInput(
+                        thread_id=self.thread_id,
+                        messages=[],
+                        new_messages=[],
+                        context=self._context or {},
+                        requested_role=list(AgentClass.roles)[0],
+                        scenario_state=self,
+                    )
+                )
+            )
+        self._new_turn()
+        self.current_turn = 0
-        testing_agent = scenario.testing_agent
-        if not testing_agent or not testing_agent.model:
-            raise Exception(default_config_error_message)
-        self.testing_agent = testing_agent
+        context_scenario.set(self.scenario)
+    def add_message(
+        self, message: ChatCompletionMessageParam, from_agent_idx: Optional[int] = None
+    ):
+        self.messages.append(message)
-        self.conversation: List[Dict[str, Any]] = []
+        # Broadcast the message to other agents
+        for idx, _ in enumerate(self._agents):
+            if idx == from_agent_idx:
+                continue
+            if idx not in self._pending_messages:
+                self._pending_messages[idx] = []
+            self._pending_messages[idx].append(message)
-    async def run(
+    def add_messages(
         self,
-        context: Optional[Dict[str, Any]] = None,
-    ) -> ScenarioResult:
+        messages: List[ChatCompletionMessageParam],
+        from_agent_idx: Optional[int] = None,
+    ):
+        for message in messages:
+            self.add_message(message, from_agent_idx)
+    def _new_turn(self):
+        self._pending_agents_on_turn = set(self._agents)
+        self._pending_roles_on_turn = [
+            ScenarioAgentRole.USER,
+            ScenarioAgentRole.AGENT,
+            ScenarioAgentRole.JUDGE,
+        ]
+        self.current_turn += 1
+    async def step(self) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
+        result = await self._step()
+        if result is None:
+            raise ValueError("No result from step")
+        return result
+    async def _step(
+        self,
+        go_to_next_turn=True,
+        on_turn: Optional[
+            Union[
+                Callable[["ScenarioExecutor"], None],
+                Callable[["ScenarioExecutor"], Awaitable[None]],
+            ]
+        ] = None,
+    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult, None]:
+        if len(self._pending_roles_on_turn) == 0:
+            if not go_to_next_turn:
+                return None
+            self._new_turn()
+            if on_turn:
+                await await_if_awaitable(on_turn(self))
+            if self.current_turn >= (self.scenario.max_turns or 10):
+                return self._reached_max_turns()
+        current_role = self._pending_roles_on_turn[0]
+        idx, next_agent = self._next_agent_for_role(current_role)
+        if not next_agent:
+            self._pending_roles_on_turn.pop(0)
+            return await self._step(go_to_next_turn=go_to_next_turn, on_turn=on_turn)
+        self._pending_agents_on_turn.remove(next_agent)
+        return await self._call_agent(idx, role=current_role)
+    def _next_agent_for_role(
+        self, role: ScenarioAgentRole
+    ) -> Tuple[int, Optional[ScenarioAgentAdapter]]:
+        for idx, agent in enumerate(self._agents):
+            if role in agent.roles and agent in self._pending_agents_on_turn:
+                return idx, agent
+        return -1, None
+    def _reached_max_turns(self, error_message: Optional[str] = None) -> ScenarioResult:
+        # If we reached max turns without conclusion, fail the test
+        agent_roles_agents_idx = [
+            idx
+            for idx, agent in enumerate(self._agents)
+            if ScenarioAgentRole.AGENT in agent.roles
+        ]
+        agent_times = [
+            self._agent_times[idx]
+            for idx in agent_roles_agents_idx
+            if idx in self._agent_times
+        ]
+        agent_time = sum(agent_times)
+        return ScenarioResult(
+            success=False,
+            messages=self.messages,
+            reasoning=error_message
+            or f"Reached maximum turns ({self.scenario.max_turns or 10}) without conclusion",
+            total_time=time.time() - self._total_start_time,
+            agent_time=agent_time,
+        )
+    async def run(self) -> ScenarioResult:
         """
         Run a scenario against the agent under test.
@@ -49,156 +210,257 @@ class ScenarioExecutor:
         if self.scenario.verbose:
             print("")  # new line
-        # Run the initial testing agent prompt to get started
-        total_start_time = time.time()
-        context_scenario.set(self.scenario)
-        next_message = self._generate_next_message(
-            self.scenario, self.conversation, first_message=True
-        )
+        self.reset()
-        if isinstance(next_message, ScenarioResult):
-            raise Exception(
-                "Unexpectedly generated a ScenarioResult for the initial message",
-                next_message.__repr__(),
-            )
-        elif self.scenario.verbose:
-            print(self._scenario_name() + termcolor.colored("User:", "green"), next_message)
+        for script_step in self._script:
+            callable = script_step(self)
+            if isinstance(callable, Awaitable):
+                result = await callable
+            else:
+                result = callable
-        # Execute the conversation
-        current_turn = 0
-        max_turns = self.scenario.max_turns or 10
-        agent_time = 0
+            if isinstance(result, ScenarioResult):
+                return result
-        # Start the test with the initial message
-        while current_turn < max_turns:
-            # Record the testing agent's message
-            self.conversation.append({"role": "user", "content": next_message})
+        return self._reached_max_turns(
+            """Reached end of script without conclusion, add one of the following to the end of the script:
-            # Get response from the agent under test
-            start_time = time.time()
+- `scenario.proceed()` to let the simulation continue to play out
+- `scenario.judge()` to force criteria judgement
+- `scenario.succeed()` or `scenario.fail()` to end the test with an explicit result
+            """
+        )
-            context_scenario.set(self.scenario)
-            with show_spinner(text="Agent:", color="blue", enabled=self.scenario.verbose):
-                agent_response = self.scenario.agent(next_message, context)
-                if isinstance(agent_response, Awaitable):
-                    agent_response = await agent_response
+    async def _call_agent(
+        self, idx: int, role: ScenarioAgentRole
+    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
+        agent = self._agents[idx]
-            has_valid_message = (
-                "message" in agent_response
-                and type(agent_response["message"]) is str
-                and agent_response["message"] is not None
+        if role == ScenarioAgentRole.USER and self.scenario.debug:
+            print(
+                f"\n{self._scenario_name()}{termcolor.colored('[Debug Mode]', 'yellow')} Press enter to continue or type a message to send"
             )
-            has_valid_messages = (
-                "messages" in agent_response
-                and isinstance(agent_response["messages"], list)
-                and all(
-                    "role" in msg or hasattr(msg, "role")
-                    for msg in agent_response["messages"]
+            input_message = input(
+                self._scenario_name() + termcolor.colored("User: ", "green")
+            )
+            # Clear the input prompt lines completely
+            for _ in range(3):
+                sys.stdout.write("\033[F")  # Move up to the input line
+                sys.stdout.write("\033[2K")  # Clear the entire input line
+            sys.stdout.flush()  # Make sure the clearing is visible
+            if input_message:
+                return [
+                    ChatCompletionUserMessageParam(role="user", content=input_message)
+                ]
+        with show_spinner(
+            text=(
+                "Judging..."
+                if role == ScenarioAgentRole.JUDGE
+                else f"{role.value if isinstance(role, ScenarioAgentRole) else role}:"
+            ),
+            color=(
+                "blue"
+                if role == ScenarioAgentRole.AGENT
+                else "green" if role == ScenarioAgentRole.USER else "yellow"
+            ),
+            enabled=self.scenario.verbose,
+        ):
+            start_time = time.time()
+            agent_response = agent.call(
+                AgentInput(
+                    # TODO: test thread_id
+                    thread_id=self.thread_id,
+                    messages=self.messages,
+                    new_messages=self._pending_messages.get(idx, []),
+                    # TODO: test context
+                    context=self._context or {},
+                    requested_role=role,
+                    scenario_state=self,
                 )
             )
-            if not has_valid_message and not has_valid_messages:
-                raise Exception(message_return_error_message(agent_response))
+            if not isinstance(agent_response, Awaitable):
+                raise Exception(
+                    agent_response_not_awaitable(agent.__class__.__name__),
+                )
-            messages: list[ChatCompletionMessageParam] = []
-            if has_valid_messages and len(agent_response["messages"]) > 0:
-                messages = agent_response["messages"]
+            agent_response = await agent_response
-                # Drop the first messages both if they are system or user messages
-                if safe_attr_or_key(safe_list_at(messages, 0), "role") == "system":
-                    messages = messages[1:]
-                if safe_attr_or_key(safe_list_at(messages, 0), "role") == "user":
-                    messages = messages[1:]
+            if idx not in self._agent_times:
+                self._agent_times[idx] = 0
+            self._agent_times[idx] += time.time() - start_time
-            if has_valid_message and self.scenario.verbose:
-                print(self._scenario_name() + termcolor.colored("Agent:", "blue"), agent_response["message"])
+            self._pending_messages[idx] = []
+            check_valid_return_type(agent_response, agent.__class__.__name__)
-            if messages and self.scenario.verbose:
-                print_openai_messages(self._scenario_name(), messages)
-            if (
-                self.scenario.verbose
-                and "extra" in agent_response
-                and len(agent_response["extra"].keys()) > 0
-            ):
-                print(
-                    termcolor.colored(
-                        "Extra:" + json.dumps(agent_response["extra"]),
-                        "magenta",
-                    )
+            messages = []
+            if isinstance(agent_response, ScenarioResult):
+                # TODO: should be an event
+                return agent_response
+            else:
+                messages = convert_agent_return_types_to_openai_messages(
+                    agent_response,
+                    role="user" if role == ScenarioAgentRole.USER else "assistant",
                 )
-            response_time = time.time() - start_time
-            agent_time += response_time
-            if messages:
-                self.conversation.extend(agent_response["messages"])
-            if "message" in agent_response:
-                self.conversation.append(
-                    {"role": "assistant", "content": agent_response["message"]}
-                )
-            if "extra" in agent_response:
-                self.conversation.append(
-                    {
-                        "role": "assistant",
-                        "content": json.dumps(agent_response["extra"]),
-                    }
+            self.add_messages(messages, from_agent_idx=idx)
+            if messages and self.scenario.verbose:
+                print_openai_messages(
+                    self._scenario_name(),
+                    [m for m in messages if m["role"] != "system"],
                 )
-            # Generate the next message OR finish the test based on the agent's evaluation
-            result = self._generate_next_message(
-                self.scenario,
-                self.conversation,
-                last_message=current_turn == max_turns - 1,
+            return messages
+    def _scenario_name(self):
+        if self.scenario.verbose == 2:
+            return termcolor.colored(f"[Scenario: {self.scenario.name}] ", "yellow")
+        else:
+            return ""
+    # State access utils
+    def last_message(self) -> ChatCompletionMessageParam:
+        if len(self.messages) == 0:
+            raise ValueError("No messages found")
+        return self.messages[-1]
+    def last_user_message(self) -> ChatCompletionUserMessageParam:
+        user_messages = [m for m in self.messages if m["role"] == "user"]
+        if not user_messages:
+            raise ValueError("No user messages found")
+        return user_messages[-1]
+    def last_tool_call(
+        self, tool_name: str
+    ) -> Optional[ChatCompletionMessageToolCallParam]:
+        for message in reversed(self.messages):
+            if message["role"] == "assistant" and "tool_calls" in message:
+                for tool_call in message["tool_calls"]:
+                    if tool_call["function"]["name"] == tool_name:
+                        return tool_call
+        return None
+    def has_tool_call(self, tool_name: str) -> bool:
+        return self.last_tool_call(tool_name) is not None
+    # Scripting utils
+    async def message(self, message: ChatCompletionMessageParam) -> None:
+        if message["role"] == "user":
+            await self._script_call_agent(ScenarioAgentRole.USER, message)
+        elif message["role"] == "assistant":
+            await self._script_call_agent(ScenarioAgentRole.AGENT, message)
+        else:
+            self.add_message(message)
+    async def user(
+        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
+    ) -> None:
+        await self._script_call_agent(ScenarioAgentRole.USER, content)
+    async def agent(
+        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
+    ) -> None:
+        await self._script_call_agent(ScenarioAgentRole.AGENT, content)
+    async def judge(
+        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
+    ) -> Optional[ScenarioResult]:
+        return await self._script_call_agent(ScenarioAgentRole.JUDGE, content)
+    async def proceed(
+        self,
+        turns: Optional[int] = None,
+        on_turn: Optional[
+            Union[
+                Callable[["ScenarioExecutor"], None],
+                Callable[["ScenarioExecutor"], Awaitable[None]],
+            ]
+        ] = None,
+        on_step: Optional[
+            Union[
+                Callable[["ScenarioExecutor"], None],
+                Callable[["ScenarioExecutor"], Awaitable[None]],
+            ]
+        ] = None,
+    ) -> Optional[ScenarioResult]:
+        initial_turn: Optional[int] = None
+        while True:
+            next_message = await self._step(
+                on_turn=on_turn,
+                go_to_next_turn=(
+                    turns is None
+                    or initial_turn is None
+                    or (self.current_turn + 1 < initial_turn + turns)
+                ),
             )
-            # Check if the result is a ScenarioResult (indicating test completion)
-            if isinstance(result, ScenarioResult):
-                result.total_time = time.time() - start_time
-                result.agent_time = agent_time
-                return result
-            elif self.scenario.verbose:
-                print(self._scenario_name() + termcolor.colored("User:", "green"), result)
+            if initial_turn is None:
+                initial_turn = self.current_turn
-            # Otherwise, it's the next message to send to the agent
-            next_message = result
+            if next_message is None:
+                break
-            # Increment turn counter
-            current_turn += 1
+            if on_step:
+                await await_if_awaitable(on_step(self))
-        # If we reached max turns without conclusion, fail the test
-        return ScenarioResult.failure_result(
-            conversation=self.conversation,
-            reasoning=f"Reached maximum turns ({max_turns}) without conclusion",
-            total_time=time.time() - total_start_time,
-            agent_time=agent_time,
+            if isinstance(next_message, ScenarioResult):
+                return next_message
+    async def succeed(self) -> ScenarioResult:
+        return ScenarioResult(
+            success=True,
+            messages=self.messages,
+            reasoning="Scenario marked as successful with scenario.succeed()",
+            passed_criteria=self.scenario.criteria,
         )
-    def _generate_next_message(
+    async def fail(self) -> ScenarioResult:
+        return ScenarioResult(
+            success=False,
+            messages=self.messages,
+            reasoning="Scenario marked as failed with scenario.fail()",
+            passed_criteria=self.scenario.criteria,
+        )
+    async def _script_call_agent(
         self,
-        scenario: "Scenario",
-        conversation: List[Dict[str, Any]],
-        first_message: bool = False,
-        last_message: bool = False,
-    ) -> Union[str, ScenarioResult]:
-        if self.scenario.debug:
-            print(f"\n{self._scenario_name()}{termcolor.colored('[Debug Mode]', 'yellow')} Press enter to continue or type a message to send")
-            input_message = input(self._scenario_name() + termcolor.colored('User: ', 'green'))
+        role: ScenarioAgentRole,
+        content: Optional[Union[str, ChatCompletionMessageParam]] = None,
+    ) -> Optional[ScenarioResult]:
+        idx, next_agent = self._next_agent_for_role(role)
+        if not next_agent:
+            self._new_turn()
+            idx, next_agent = self._next_agent_for_role(role)
+            if not next_agent:
+                if content:
+                    raise ValueError(
+                        f"Cannot generate a message for role `{role.value}` with content `{content}` because no agent with this role was found"
+                    )
+                raise ValueError(
+                    f"Cannot generate a message for role `{role.value}` because no agent with this role was found"
+                )
-            # Clear the input prompt lines completely
-            for _ in range(3):
-                sys.stdout.write("\033[F")  # Move up to the input line
-                sys.stdout.write("\033[2K")  # Clear the entire input line
-            sys.stdout.flush()  # Make sure the clearing is visible
+        self._pending_agents_on_turn.remove(next_agent)
+        self._pending_roles_on_turn.remove(role)
-            if input_message:
-                return input_message
+        if content:
+            if isinstance(content, str):
+                message = ChatCompletionUserMessageParam(role="user", content=content)
+            else:
+                message = content
-        with show_spinner(text=f"{self._scenario_name()}User:", color="green", enabled=self.scenario.verbose):
-            return self.testing_agent.generate_next_message(
-                scenario, conversation, first_message, last_message
-            )
+            self.add_message(message)
+            if self.scenario.verbose:
+                print_openai_messages(self._scenario_name(), [message])
+            return
-    def _scenario_name(self):
-        if self.scenario.verbose == 2:
-            return termcolor.colored(f"[Scenario: {self.scenario.name}] ", "yellow")
-        else:
-            return ""
+        result = await self._call_agent(idx, role=role)
+        if isinstance(result, ScenarioResult):
+            return result

langwatch-scenario 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

langwatch-scenario 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl