PyPI - langwatch-scenario - Versions diffs - 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

langwatch-scenario 0.1.3py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/METADATA +95 -34
langwatch_scenario-0.3.0.dist-info/RECORD +16 -0
{langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/WHEEL +1 -1
scenario/__init__.py +13 -3
scenario/config.py +18 -7
scenario/error_messages.py +81 -23
scenario/pytest_plugin.py +8 -8
scenario/scenario.py +144 -26
scenario/scenario_agent_adapter.py +16 -0
scenario/scenario_executor.py +405 -143
scenario/testing_agent.py +123 -109
scenario/types.py +96 -0
scenario/utils.py +148 -5
langwatch_scenario-0.1.3.dist-info/RECORD +0 -15
scenario/result.py +0 -81
{langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/top_level.txt +0 -0

scenario/scenario.py CHANGED Viewed

@@ -2,19 +2,33 @@
 Scenario module: defines the core Scenario class for agent testing.
 """
-from typing import Awaitable, List, Dict, Any, Optional, Callable, TypedDict, Union
+from typing import (
+    Awaitable,
+    Callable,
+    List,
+    Dict,
+    Any,
+    Optional,
+    Type,
+    TypedDict,
+    Union,
+)
 import asyncio
 import concurrent.futures
-from functools import partial
 from scenario.config import ScenarioConfig
+from scenario.error_messages import (
+    default_config_error_message,
+    message_invalid_agent_type,
+)
+from scenario.scenario_agent_adapter import ScenarioAgentAdapter
 from scenario.scenario_executor import ScenarioExecutor
-from .result import ScenarioResult
-from .testing_agent import TestingAgent
+from .types import ScenarioResult, ScriptStep
 from openai.types.chat import ChatCompletionMessageParam
 class AgentResult(TypedDict, total=False):
     message: str
     messages: List[ChatCompletionMessageParam]
@@ -27,44 +41,94 @@ class Scenario(ScenarioConfig):
     It includes:
     - A description of the scenario
-    - Success criteria to determine if the agent behaved correctly
-    - Failure criteria to determine if the agent failed
-    - An optional strategy that guides the testing agent
+    - Criteria to determine if the agent behaved correctly
     - Optional additional parameters
     """
+    name: str
     description: str
-    agent: Union[
-        Callable[[str, Optional[Dict[str, Any]]], Dict[str, Any]],
-        Callable[[str, Optional[Dict[str, Any]]], Awaitable[Dict[str, Any]]],
-    ]
-    success_criteria: List[str]
-    failure_criteria: List[str] = []
-    strategy: Optional[str] = None
-    def __init__(self, description: str, **kwargs):
+    agents: List[Type[ScenarioAgentAdapter]]
+    criteria: List[str]
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        criteria: List[str] = [],
+        agent: Optional[Type[ScenarioAgentAdapter]] = None,
+        testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
+        agents: List[Type[ScenarioAgentAdapter]] = [],
+        max_turns: Optional[int] = None,
+        verbose: Optional[Union[bool, int]] = None,
+        cache_key: Optional[str] = None,
+        debug: Optional[bool] = None,
+    ):
         """Validate scenario configuration after initialization."""
-        default_config = getattr(Scenario, "default_config", None)
+        config = ScenarioConfig(
+            testing_agent=testing_agent,
+            max_turns=max_turns,
+            verbose=verbose,
+            cache_key=cache_key,
+            debug=debug,
+        )
+        kwargs = config.items()
+        default_config: Optional[ScenarioConfig] = getattr(
+            Scenario, "default_config", None
+        )
         if default_config:
-            kwargs = {**default_config.model_dump(), **kwargs}
+            kwargs = default_config.merge(config).items()
+        if not name:
+            raise ValueError("Scenario name cannot be empty")
+        kwargs["name"] = name
         if not description:
             raise ValueError("Scenario description cannot be empty")
         kwargs["description"] = description
-        if not kwargs.get("success_criteria"):
-            raise ValueError("Scenario must have at least one success criterion")
+        kwargs["criteria"] = criteria
-        if kwargs.get("max_turns", 0) < 1:
+        if kwargs.get("max_turns", 10) < 1:
             raise ValueError("max_turns must be a positive integer")
-        # Ensure agent is callable
-        if not callable(kwargs.get("agent")):
-            raise ValueError("Agent must be a callable function")
+        if not agents and not agent:
+            raise ValueError(
+                "Missing required argument `agent`. Either `agent` or `agents` argument must be provided for the Scenario"
+            )
+        if not agents and not kwargs.get("testing_agent"):
+            raise Exception(default_config_error_message)
+        agents = agents or [
+            kwargs.get("testing_agent"),
+            agent,  # type: ignore
+        ]
+        # Ensure each agent is a ScenarioAgentAdapter
+        for agent in agents:
+            if (
+                not agent
+                or not isinstance(agent, type)
+                or not issubclass(agent, ScenarioAgentAdapter)
+            ):
+                raise ValueError(message_invalid_agent_type(agent))
+        kwargs["agents"] = agents
         super().__init__(**kwargs)
+    def script(self, script: List[ScriptStep]):
+        class ScriptedScenario:
+            def __init__(self, scenario: "Scenario"):
+                self._scenario = scenario
+            async def run(
+                self, context: Optional[Dict[str, Any]] = None
+            ) -> ScenarioResult:
+                return await self._scenario._run(context, script)
+        return ScriptedScenario(self)
     async def run(self, context: Optional[Dict[str, Any]] = None) -> ScenarioResult:
         """
@@ -77,17 +141,27 @@ class Scenario(ScenarioConfig):
             ScenarioResult containing the test outcome
         """
+        return await self._run(context, None)
+    async def _run(
+        self,
+        context: Optional[Dict[str, Any]] = None,
+        script: Optional[List[ScriptStep]] = None,
+    ) -> ScenarioResult:
         # We'll use a thread pool to run the execution logic, we
         # require a separate thread because even though asyncio is
         # being used throughout, any user code on the callback can
         # be blocking, preventing them from running scenarios in parallel
         with concurrent.futures.ThreadPoolExecutor() as executor:
             def run_in_thread():
                 loop = asyncio.new_event_loop()
                 asyncio.set_event_loop(loop)
                 try:
-                    return loop.run_until_complete(ScenarioExecutor(self).run(context))
+                    return loop.run_until_complete(
+                        ScenarioExecutor(self, context, script).run()
+                    )
                 finally:
                     loop.close()
@@ -101,7 +175,7 @@ class Scenario(ScenarioConfig):
     @classmethod
     def configure(
         cls,
-        testing_agent: Optional[TestingAgent] = None,
+        testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
         max_turns: Optional[int] = None,
         verbose: Optional[Union[bool, int]] = None,
         cache_key: Optional[str] = None,
@@ -118,3 +192,47 @@ class Scenario(ScenarioConfig):
                 debug=debug,
             )
         )
+    # Scenario Scripting
+    def message(self, message: ChatCompletionMessageParam) -> ScriptStep:
+        return lambda state: state.message(message)
+    def user(
+        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
+    ) -> ScriptStep:
+        return lambda state: state.user(content)
+    def agent(
+        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
+    ) -> ScriptStep:
+        return lambda state: state.agent(content)
+    def judge(
+        self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
+    ) -> ScriptStep:
+        return lambda state: state.judge(content)
+    def proceed(
+        self,
+        turns: Optional[int] = None,
+        on_turn: Optional[
+            Union[
+                Callable[[ScenarioExecutor], None],
+                Callable[[ScenarioExecutor], Awaitable[None]],
+            ]
+        ] = None,
+        on_step: Optional[
+            Union[
+                Callable[[ScenarioExecutor], None],
+                Callable[[ScenarioExecutor], Awaitable[None]],
+            ]
+        ] = None,
+    ) -> ScriptStep:
+        return lambda state: state.proceed(turns, on_turn, on_step)
+    def succeed(self) -> ScriptStep:
+        return lambda state: state.succeed()
+    def fail(self) -> ScriptStep:
+        return lambda state: state.fail()

scenario/scenario_agent_adapter.py ADDED Viewed

@@ -0,0 +1,16 @@
+from abc import ABC, abstractmethod
+from typing import ClassVar, Set
+from .types import AgentInput, AgentReturnTypes, ScenarioAgentRole
+class ScenarioAgentAdapter(ABC):
+    roles: ClassVar[Set[ScenarioAgentRole]] = {ScenarioAgentRole.AGENT}
+    def __init__(self, input: AgentInput):
+        super().__init__()
+        pass
+    @abstractmethod
+    async def call(self, input: AgentInput) -> AgentReturnTypes:
+        pass

langwatch-scenario 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

langwatch-scenario 0.1.3py3-none-any.whl → 0.3.0py3-none-any.whl