PyPI - langwatch-scenario - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

langwatch-scenario 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/METADATA +140 -79
langwatch_scenario-0.4.0.dist-info/RECORD +18 -0
scenario/__init__.py +223 -9
scenario/agent_adapter.py +111 -0
scenario/cache.py +132 -8
scenario/config.py +154 -10
scenario/error_messages.py +8 -38
scenario/judge_agent.py +435 -0
scenario/pytest_plugin.py +223 -15
scenario/scenario_executor.py +428 -136
scenario/scenario_state.py +205 -0
scenario/script.py +361 -0
scenario/types.py +193 -20
scenario/user_simulator_agent.py +249 -0
scenario/utils.py +252 -2
langwatch_scenario-0.3.0.dist-info/RECORD +0 -16
scenario/scenario.py +0 -238
scenario/scenario_agent_adapter.py +0 -16
scenario/testing_agent.py +0 -279
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/top_level.txt +0 -0

scenario/utils.py CHANGED Viewed

@@ -1,3 +1,11 @@
+"""
+Utility functions for scenario execution and message handling.
+This module provides various utility functions used throughout the Scenario framework,
+including message formatting, validation, role reversal, and UI components like spinners
+for better user experience during scenario execution.
+"""
 from contextlib import contextmanager
 import sys
 from typing import (
@@ -12,6 +20,7 @@ from typing import (
     cast,
 )
 from pydantic import BaseModel
+import copy
 import json
@@ -31,6 +40,22 @@ T = TypeVar("T")
 class SerializableAndPydanticEncoder(json.JSONEncoder):
+    """
+    JSON encoder that handles Pydantic models and iterators.
+    This encoder extends the standard JSON encoder to handle Pydantic BaseModel
+    instances and iterator objects, converting them to serializable formats.
+    Used for caching and logging scenarios that contain complex objects.
+    Example:
+        ```python
+        data = {
+            "model": SomeBaseModel(field="value"),
+            "iterator": iter([1, 2, 3])
+        }
+        json.dumps(data, cls=SerializableAndPydanticEncoder)
+        ```
+    """
     def default(self, o):
         if isinstance(o, BaseModel):
             return o.model_dump(exclude_unset=True)
@@ -40,6 +65,21 @@ class SerializableAndPydanticEncoder(json.JSONEncoder):
 class SerializableWithStringFallback(SerializableAndPydanticEncoder):
+    """
+    JSON encoder with string fallback for non-serializable objects.
+    This encoder extends SerializableAndPydanticEncoder by providing a string
+    fallback for any object that cannot be serialized normally. This ensures
+    that logging and caching operations never fail due to serialization issues.
+    Example:
+        ```python
+        # This will work even with complex non-serializable objects
+        data = {"function": lambda x: x, "complex_object": SomeComplexClass()}
+        json.dumps(data, cls=SerializableWithStringFallback)
+        # Result: {"function": "<function <lambda> at 0x...>", "complex_object": "..."}
+        ```
+    """
     def default(self, o):
         try:
             return super().default(o)
@@ -48,6 +88,25 @@ class SerializableWithStringFallback(SerializableAndPydanticEncoder):
 def safe_list_at(list, index, default=None):
+    """
+    Safely get an item from a list by index with a default fallback.
+    Args:
+        list: The list to access
+        index: The index to retrieve
+        default: Value to return if index is out of bounds
+    Returns:
+        The item at the index, or the default value if index is invalid
+    Example:
+        ```python
+        items = ["a", "b", "c"]
+        print(safe_list_at(items, 1))    # "b"
+        print(safe_list_at(items, 10))   # None
+        print(safe_list_at(items, 10, "default"))  # "default"
+        ```
+    """
     try:
         return list[index]
     except:
@@ -55,16 +114,85 @@ def safe_list_at(list, index, default=None):
 def safe_attr_or_key(obj, attr_or_key, default=None):
+    """
+    Safely get an attribute or dictionary key from an object.
+    Tries to get the value as an attribute first, then as a dictionary key,
+    returning the default if neither exists.
+    Args:
+        obj: Object to access (can have attributes or be dict-like)
+        attr_or_key: Name of attribute or key to retrieve
+        default: Value to return if attribute/key doesn't exist
+    Returns:
+        The attribute/key value, or the default if not found
+    Example:
+        ```python
+        class MyClass:
+            attr = "value"
+        obj = MyClass()
+        dict_obj = {"key": "value"}
+        print(safe_attr_or_key(obj, "attr"))     # "value"
+        print(safe_attr_or_key(dict_obj, "key")) # "value"
+        print(safe_attr_or_key(obj, "missing"))  # None
+        ```
+    """
     return getattr(obj, attr_or_key, obj.get(attr_or_key))
 def title_case(string):
+    """
+    Convert snake_case string to Title Case.
+    Args:
+        string: Snake_case string to convert
+    Returns:
+        String converted to Title Case
+    Example:
+        ```python
+        print(title_case("user_simulator_agent"))  # "User Simulator Agent"
+        print(title_case("api_key"))               # "Api Key"
+        ```
+    """
     return " ".join(word.capitalize() for word in string.split("_"))
 def print_openai_messages(
     scenario_name: str, messages: list[ChatCompletionMessageParam]
 ):
+    """
+    Print OpenAI-format messages with colored formatting for readability.
+    This function formats and prints conversation messages with appropriate
+    colors and formatting for different message types (user, assistant, tool calls, etc.).
+    Used for verbose output during scenario execution.
+    Args:
+        scenario_name: Name of the scenario (used as prefix)
+        messages: List of OpenAI-compatible messages to print
+    Example:
+        ```python
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+            {"role": "assistant", "tool_calls": [{"function": {"name": "search"}}]}
+        ]
+        print_openai_messages("Test Scenario", messages)
+        ```
+    Note:
+        - User messages are printed in green
+        - Assistant messages are printed in blue
+        - Tool calls are printed in magenta with formatted JSON
+        - Long JSON content is truncated for readability
+    """
     for msg in messages:
         role = safe_attr_or_key(msg, "role")
         content = safe_attr_or_key(msg, "content")
@@ -99,6 +227,19 @@ def print_openai_messages(
 def _take_maybe_json_first_lines(string, max_lines=5):
+    """
+    Truncate string content and format JSON if possible.
+    Internal utility function that attempts to format content as JSON
+    and truncates it to a reasonable number of lines for display.
+    Args:
+        string: Content to format and truncate
+        max_lines: Maximum number of lines to show
+    Returns:
+        Formatted and potentially truncated string
+    """
     content = str(string)
     try:
         content = json.dumps(json.loads(content), indent=2)
@@ -114,6 +255,19 @@ console = Console()
 class TextFirstSpinner(Spinner):
+    """
+    Custom spinner that displays text before the spinning animation.
+    This class extends Rich's Spinner to show descriptive text followed
+    by the spinning animation, improving the user experience during
+    scenario execution by clearly indicating what operation is happening.
+    Args:
+        name: Name of the spinner animation style
+        text: Descriptive text to show before the spinner
+        color: Color for the descriptive text
+        **kwargs: Additional arguments passed to the base Spinner class
+    """
     def __init__(self, name, text: str, color: str, **kwargs):
         super().__init__(
             name, "", style="bold white", **kwargs
@@ -132,6 +286,32 @@ class TextFirstSpinner(Spinner):
 def show_spinner(
     text: str, color: str = "white", enabled: Optional[Union[bool, int]] = None
 ):
+    """
+    Context manager for displaying a spinner during long-running operations.
+    Shows a spinning indicator with descriptive text while code executes
+    within the context. Automatically cleans up the spinner display when
+    the operation completes.
+    Args:
+        text: Descriptive text to show next to the spinner
+        color: Color for the descriptive text
+        enabled: Whether to show the spinner (respects verbose settings)
+    Example:
+        ```python
+        with show_spinner("Calling agent...", color="blue", enabled=True):
+            response = await agent.call(input_data)
+        # Spinner automatically disappears when block completes
+        print("Agent call completed")
+        ```
+    Note:
+        - Spinner is automatically cleaned up when context exits
+        - Gracefully handles multi-threading scenarios where multiple spinners might conflict
+        - Cursor positioning ensures clean terminal output
+    """
     if not enabled:
         yield
     else:
@@ -150,6 +330,31 @@ def show_spinner(
 def check_valid_return_type(return_value: Any, class_name: str) -> None:
+    """
+    Validate that an agent's return value is in the expected format.
+    This function ensures that agent adapters return values in one of the
+    supported formats (string, OpenAI message, list of messages, or ScenarioResult).
+    It also verifies that the returned data is JSON-serializable for caching.
+    Args:
+        return_value: The value returned by an agent's call method
+        class_name: Name of the agent class (for error messages)
+    Raises:
+        ValueError: If the return value is not in a supported format
+    Example:
+        ```python
+        # Valid return values
+        check_valid_return_type("Hello world", "MyAgent")  # OK
+        check_valid_return_type({"role": "assistant", "content": "Hi"}, "MyAgent")  # OK
+        check_valid_return_type([{"role": "assistant", "content": "Hi"}], "MyAgent")  # OK
+        # Invalid return value
+        check_valid_return_type(42, "MyAgent")  # Raises ValueError
+        ```
+    """
     def _is_valid_openai_message(message: Any) -> bool:
         return (isinstance(message, dict) and "role" in message) or (
             isinstance(message, BaseModel) and hasattr(message, "role")
@@ -181,6 +386,43 @@ def check_valid_return_type(return_value: Any, class_name: str) -> None:
 def convert_agent_return_types_to_openai_messages(
     agent_response: AgentReturnTypes, role: Literal["user", "assistant"]
 ) -> List[ChatCompletionMessageParam]:
+    """
+    Convert various agent return types to standardized OpenAI message format.
+    This function normalizes different return types from agent adapters into
+    a consistent list of OpenAI-compatible messages that can be used throughout
+    the scenario execution pipeline.
+    Args:
+        agent_response: Response from an agent adapter call
+        role: The role to assign to string responses ("user" or "assistant")
+    Returns:
+        List of OpenAI-compatible messages
+    Raises:
+        ValueError: If agent_response is a ScenarioResult (which should be handled separately)
+    Example:
+        ```python
+        # String response
+        messages = convert_agent_return_types_to_openai_messages("Hello", "assistant")
+        # Result: [{"role": "assistant", "content": "Hello"}]
+        # Dict response
+        response = {"role": "assistant", "content": "Hi", "tool_calls": [...]}
+        messages = convert_agent_return_types_to_openai_messages(response, "assistant")
+        # Result: [{"role": "assistant", "content": "Hi", "tool_calls": [...]}]
+        # List response
+        responses = [
+            {"role": "assistant", "content": "Thinking..."},
+            {"role": "assistant", "content": "Here's the answer"}
+        ]
+        messages = convert_agent_return_types_to_openai_messages(responses, "assistant")
+        # Result: Same list, validated and normalized
+        ```
+    """
     if isinstance(agent_response, ScenarioResult):
         raise ValueError(
             "Unexpectedly tried to convert a ScenarioResult to openai messages",
@@ -199,6 +441,7 @@ def convert_agent_return_types_to_openai_messages(
                     exclude_unset=True,
                     exclude_none=True,
                     exclude_defaults=True,
+                    warnings=False,
                 ),
             )
         else:
@@ -236,11 +479,16 @@ def reverse_roles(
         messages: The list of messages to reverse the roles of.
     """
-    for message in messages.copy():
+    reversed_messages = []
+    for message in messages:
+        message = copy.deepcopy(message)
         # Can't reverse tool calls
         if not safe_attr_or_key(message, "content") or safe_attr_or_key(
             message, "tool_calls"
         ):
+            # If no content nor tool calls, we should skip it entirely, as anthropic may generate some invalid ones e.g. pure {"role": "assistant"}
+            if safe_attr_or_key(message, "tool_calls"):
+                reversed_messages.append(message)
             continue
         if type(message) == dict:
@@ -254,7 +502,9 @@ def reverse_roles(
             elif getattr(message, "role", None) == "assistant":
                 message.role = "user"  # type: ignore
-    return messages
+        reversed_messages.append(message)
+    return reversed_messages
 async def await_if_awaitable(value: T) -> T:

langwatch_scenario-0.3.0.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-scenario/__init__.py,sha256=0OavO4hoZMFL6frlplNkR7BSHfGSOhuVtmKmTrOMFEs,844
-scenario/cache.py,sha256=sYu16SAf-BnVYkWSlEDzpyynJGIQyNYsgMXPgCqEnmk,1719
-scenario/config.py,sha256=NiCCmr8flds-VDzvF8ps4SChVTARtcWfEoHhK0UkDMQ,1076
-scenario/error_messages.py,sha256=8_pa3HIaqkw08qOqeiRKDCNykr9jtofpNJoEV03aRWc,4690
-scenario/pytest_plugin.py,sha256=oJtEPVPi5x50Z-UawVyVPNd6buvh_4msSZ-3hLFpw_Y,5770
-scenario/scenario.py,sha256=K4Snu4-pJaoprEFyly7ZQT8qNlAamxt-eXibCJ0EIJU,7332
-scenario/scenario_agent_adapter.py,sha256=Y2dP3z-2jLYCssQ20oHOphwwrRPQNo2HmLD2KBcJRu0,427
-scenario/scenario_executor.py,sha256=geaP3Znd1he66L6ku3l2IAODj68TtAIk8b8Ssy494xA,15681
-scenario/testing_agent.py,sha256=5S2PIl2hi9FBSVjjs9afXhEgiogryjBIyffH5iJBwdo,10676
-scenario/types.py,sha256=-Uz0qg_fY5vAEkrZnM5CMqE5hiP8OtNErpDdHJmHtac,3179
-scenario/utils.py,sha256=bx813RpZO3xyPfD-dTBbeLM9umWm3PGOq9pw48aJoHI,8113
-langwatch_scenario-0.3.0.dist-info/METADATA,sha256=pywrVOVE2eE4Zk5wePzJoEfErNXWvgK-C8G-qfWp7EI,11040
-langwatch_scenario-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langwatch_scenario-0.3.0.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
-langwatch_scenario-0.3.0.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
-langwatch_scenario-0.3.0.dist-info/RECORD,,

scenario/scenario.py DELETED Viewed

@@ -1,238 +0,0 @@
-"""
-Scenario module: defines the core Scenario class for agent testing.
-"""
-from typing import (
-    Awaitable,
-    Callable,
-    List,
-    Dict,
-    Any,
-    Optional,
-    Type,
-    TypedDict,
-    Union,
-)
-import asyncio
-import concurrent.futures
-from scenario.config import ScenarioConfig
-from scenario.error_messages import (
-    default_config_error_message,
-    message_invalid_agent_type,
-)
-from scenario.scenario_agent_adapter import ScenarioAgentAdapter
-from scenario.scenario_executor import ScenarioExecutor
-from .types import ScenarioResult, ScriptStep
-from openai.types.chat import ChatCompletionMessageParam
-class AgentResult(TypedDict, total=False):
-    message: str
-    messages: List[ChatCompletionMessageParam]
-    extra: Dict[str, Any]
-class Scenario(ScenarioConfig):
-    """
-    A scenario represents a specific testing case for an agent.
-    It includes:
-    - A description of the scenario
-    - Criteria to determine if the agent behaved correctly
-    - Optional additional parameters
-    """
-    name: str
-    description: str
-    agents: List[Type[ScenarioAgentAdapter]]
-    criteria: List[str]
-    def __init__(
-        self,
-        name: str,
-        description: str,
-        criteria: List[str] = [],
-        agent: Optional[Type[ScenarioAgentAdapter]] = None,
-        testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
-        agents: List[Type[ScenarioAgentAdapter]] = [],
-        max_turns: Optional[int] = None,
-        verbose: Optional[Union[bool, int]] = None,
-        cache_key: Optional[str] = None,
-        debug: Optional[bool] = None,
-    ):
-        """Validate scenario configuration after initialization."""
-        config = ScenarioConfig(
-            testing_agent=testing_agent,
-            max_turns=max_turns,
-            verbose=verbose,
-            cache_key=cache_key,
-            debug=debug,
-        )
-        kwargs = config.items()
-        default_config: Optional[ScenarioConfig] = getattr(
-            Scenario, "default_config", None
-        )
-        if default_config:
-            kwargs = default_config.merge(config).items()
-        if not name:
-            raise ValueError("Scenario name cannot be empty")
-        kwargs["name"] = name
-        if not description:
-            raise ValueError("Scenario description cannot be empty")
-        kwargs["description"] = description
-        kwargs["criteria"] = criteria
-        if kwargs.get("max_turns", 10) < 1:
-            raise ValueError("max_turns must be a positive integer")
-        if not agents and not agent:
-            raise ValueError(
-                "Missing required argument `agent`. Either `agent` or `agents` argument must be provided for the Scenario"
-            )
-        if not agents and not kwargs.get("testing_agent"):
-            raise Exception(default_config_error_message)
-        agents = agents or [
-            kwargs.get("testing_agent"),
-            agent,  # type: ignore
-        ]
-        # Ensure each agent is a ScenarioAgentAdapter
-        for agent in agents:
-            if (
-                not agent
-                or not isinstance(agent, type)
-                or not issubclass(agent, ScenarioAgentAdapter)
-            ):
-                raise ValueError(message_invalid_agent_type(agent))
-        kwargs["agents"] = agents
-        super().__init__(**kwargs)
-    def script(self, script: List[ScriptStep]):
-        class ScriptedScenario:
-            def __init__(self, scenario: "Scenario"):
-                self._scenario = scenario
-            async def run(
-                self, context: Optional[Dict[str, Any]] = None
-            ) -> ScenarioResult:
-                return await self._scenario._run(context, script)
-        return ScriptedScenario(self)
-    async def run(self, context: Optional[Dict[str, Any]] = None) -> ScenarioResult:
-        """
-        Run the scenario against the agent under test.
-        Args:
-            context: Optional initial context for the agent
-        Returns:
-            ScenarioResult containing the test outcome
-        """
-        return await self._run(context, None)
-    async def _run(
-        self,
-        context: Optional[Dict[str, Any]] = None,
-        script: Optional[List[ScriptStep]] = None,
-    ) -> ScenarioResult:
-        # We'll use a thread pool to run the execution logic, we
-        # require a separate thread because even though asyncio is
-        # being used throughout, any user code on the callback can
-        # be blocking, preventing them from running scenarios in parallel
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            def run_in_thread():
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-                try:
-                    return loop.run_until_complete(
-                        ScenarioExecutor(self, context, script).run()
-                    )
-                finally:
-                    loop.close()
-            # Run the function in the thread pool and await its result
-            # This converts the thread's execution into a Future that the current
-            # event loop can await without blocking
-            loop = asyncio.get_event_loop()
-            result = await loop.run_in_executor(executor, run_in_thread)
-            return result
-    @classmethod
-    def configure(
-        cls,
-        testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
-        max_turns: Optional[int] = None,
-        verbose: Optional[Union[bool, int]] = None,
-        cache_key: Optional[str] = None,
-        debug: Optional[bool] = None,
-    ) -> None:
-        existing_config = getattr(cls, "default_config", ScenarioConfig())
-        cls.default_config = existing_config.merge(
-            ScenarioConfig(
-                testing_agent=testing_agent,
-                max_turns=max_turns,
-                verbose=verbose,
-                cache_key=cache_key,
-                debug=debug,
-            )
-        )
-    # Scenario Scripting
-    def message(self, message: ChatCompletionMessageParam) -> ScriptStep:
-        return lambda state: state.message(message)
-    def user(
-        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
-    ) -> ScriptStep:
-        return lambda state: state.user(content)
-    def agent(
-        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
-    ) -> ScriptStep:
-        return lambda state: state.agent(content)
-    def judge(
-        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
-    ) -> ScriptStep:
-        return lambda state: state.judge(content)
-    def proceed(
-        self,
-        turns: Optional[int] = None,
-        on_turn: Optional[
-            Union[
-                Callable[[ScenarioExecutor], None],
-                Callable[[ScenarioExecutor], Awaitable[None]],
-            ]
-        ] = None,
-        on_step: Optional[
-            Union[
-                Callable[[ScenarioExecutor], None],
-                Callable[[ScenarioExecutor], Awaitable[None]],
-            ]
-        ] = None,
-    ) -> ScriptStep:
-        return lambda state: state.proceed(turns, on_turn, on_step)
-    def succeed(self) -> ScriptStep:
-        return lambda state: state.succeed()
-    def fail(self) -> ScriptStep:
-        return lambda state: state.fail()

scenario/scenario_agent_adapter.py DELETED Viewed

@@ -1,16 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import ClassVar, Set
-from .types import AgentInput, AgentReturnTypes, ScenarioAgentRole
-class ScenarioAgentAdapter(ABC):
-    roles: ClassVar[Set[ScenarioAgentRole]] = {ScenarioAgentRole.AGENT}
-    def __init__(self, input: AgentInput):
-        super().__init__()
-        pass
-    @abstractmethod
-    async def call(self, input: AgentInput) -> AgentReturnTypes:
-        pass

langwatch-scenario 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

langwatch-scenario 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl