langwatch-scenario 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scenario/scenario.py CHANGED
@@ -2,19 +2,33 @@
2
2
  Scenario module: defines the core Scenario class for agent testing.
3
3
  """
4
4
 
5
- from typing import Awaitable, List, Dict, Any, Optional, Callable, TypedDict, Union
5
+ from typing import (
6
+ Awaitable,
7
+ Callable,
8
+ List,
9
+ Dict,
10
+ Any,
11
+ Optional,
12
+ Type,
13
+ TypedDict,
14
+ Union,
15
+ )
6
16
  import asyncio
7
17
  import concurrent.futures
8
- from functools import partial
9
18
 
10
19
  from scenario.config import ScenarioConfig
20
+ from scenario.error_messages import (
21
+ default_config_error_message,
22
+ message_invalid_agent_type,
23
+ )
24
+ from scenario.scenario_agent_adapter import ScenarioAgentAdapter
11
25
  from scenario.scenario_executor import ScenarioExecutor
12
26
 
13
- from .result import ScenarioResult
14
- from .testing_agent import TestingAgent
27
+ from .types import ScenarioResult, ScriptStep
15
28
 
16
29
  from openai.types.chat import ChatCompletionMessageParam
17
30
 
31
+
18
32
  class AgentResult(TypedDict, total=False):
19
33
  message: str
20
34
  messages: List[ChatCompletionMessageParam]
@@ -27,44 +41,94 @@ class Scenario(ScenarioConfig):
27
41
 
28
42
  It includes:
29
43
  - A description of the scenario
30
- - Success criteria to determine if the agent behaved correctly
31
- - Failure criteria to determine if the agent failed
32
- - An optional strategy that guides the testing agent
44
+ - Criteria to determine if the agent behaved correctly
33
45
  - Optional additional parameters
34
46
  """
35
47
 
48
+ name: str
36
49
  description: str
37
- agent: Union[
38
- Callable[[str, Optional[Dict[str, Any]]], Dict[str, Any]],
39
- Callable[[str, Optional[Dict[str, Any]]], Awaitable[Dict[str, Any]]],
40
- ]
41
- success_criteria: List[str]
42
- failure_criteria: List[str] = []
43
- strategy: Optional[str] = None
44
-
45
- def __init__(self, description: str, **kwargs):
50
+ agents: List[Type[ScenarioAgentAdapter]]
51
+ criteria: List[str]
52
+
53
+ def __init__(
54
+ self,
55
+ name: str,
56
+ description: str,
57
+ criteria: List[str] = [],
58
+ agent: Optional[Type[ScenarioAgentAdapter]] = None,
59
+ testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
60
+ agents: List[Type[ScenarioAgentAdapter]] = [],
61
+ max_turns: Optional[int] = None,
62
+ verbose: Optional[Union[bool, int]] = None,
63
+ cache_key: Optional[str] = None,
64
+ debug: Optional[bool] = None,
65
+ ):
46
66
  """Validate scenario configuration after initialization."""
47
67
 
48
- default_config = getattr(Scenario, "default_config", None)
68
+ config = ScenarioConfig(
69
+ testing_agent=testing_agent,
70
+ max_turns=max_turns,
71
+ verbose=verbose,
72
+ cache_key=cache_key,
73
+ debug=debug,
74
+ )
75
+
76
+ kwargs = config.items()
77
+ default_config: Optional[ScenarioConfig] = getattr(
78
+ Scenario, "default_config", None
79
+ )
49
80
  if default_config:
50
- kwargs = {**default_config.model_dump(), **kwargs}
81
+ kwargs = default_config.merge(config).items()
82
+
83
+ if not name:
84
+ raise ValueError("Scenario name cannot be empty")
85
+ kwargs["name"] = name
51
86
 
52
87
  if not description:
53
88
  raise ValueError("Scenario description cannot be empty")
54
89
  kwargs["description"] = description
55
90
 
56
- if not kwargs.get("success_criteria"):
57
- raise ValueError("Scenario must have at least one success criterion")
91
+ kwargs["criteria"] = criteria
58
92
 
59
- if kwargs.get("max_turns", 0) < 1:
93
+ if kwargs.get("max_turns", 10) < 1:
60
94
  raise ValueError("max_turns must be a positive integer")
61
95
 
62
- # Ensure agent is callable
63
- if not callable(kwargs.get("agent")):
64
- raise ValueError("Agent must be a callable function")
96
+ if not agents and not agent:
97
+ raise ValueError(
98
+ "Missing required argument `agent`. Either `agent` or `agents` argument must be provided for the Scenario"
99
+ )
100
+
101
+ if not agents and not kwargs.get("testing_agent"):
102
+ raise Exception(default_config_error_message)
103
+
104
+ agents = agents or [
105
+ kwargs.get("testing_agent"),
106
+ agent, # type: ignore
107
+ ]
108
+
109
+ # Ensure each agent is a ScenarioAgentAdapter
110
+ for agent in agents:
111
+ if (
112
+ not agent
113
+ or not isinstance(agent, type)
114
+ or not issubclass(agent, ScenarioAgentAdapter)
115
+ ):
116
+ raise ValueError(message_invalid_agent_type(agent))
117
+ kwargs["agents"] = agents
65
118
 
66
119
  super().__init__(**kwargs)
67
120
 
121
+ def script(self, script: List[ScriptStep]):
122
+ class ScriptedScenario:
123
+ def __init__(self, scenario: "Scenario"):
124
+ self._scenario = scenario
125
+
126
+ async def run(
127
+ self, context: Optional[Dict[str, Any]] = None
128
+ ) -> ScenarioResult:
129
+ return await self._scenario._run(context, script)
130
+
131
+ return ScriptedScenario(self)
68
132
 
69
133
  async def run(self, context: Optional[Dict[str, Any]] = None) -> ScenarioResult:
70
134
  """
@@ -77,17 +141,27 @@ class Scenario(ScenarioConfig):
77
141
  ScenarioResult containing the test outcome
78
142
  """
79
143
 
144
+ return await self._run(context, None)
145
+
146
+ async def _run(
147
+ self,
148
+ context: Optional[Dict[str, Any]] = None,
149
+ script: Optional[List[ScriptStep]] = None,
150
+ ) -> ScenarioResult:
80
151
  # We'll use a thread pool to run the execution logic, we
81
152
  # require a separate thread because even though asyncio is
82
153
  # being used throughout, any user code on the callback can
83
154
  # be blocking, preventing them from running scenarios in parallel
84
155
  with concurrent.futures.ThreadPoolExecutor() as executor:
156
+
85
157
  def run_in_thread():
86
158
  loop = asyncio.new_event_loop()
87
159
  asyncio.set_event_loop(loop)
88
160
 
89
161
  try:
90
- return loop.run_until_complete(ScenarioExecutor(self).run(context))
162
+ return loop.run_until_complete(
163
+ ScenarioExecutor(self, context, script).run()
164
+ )
91
165
  finally:
92
166
  loop.close()
93
167
 
@@ -101,7 +175,7 @@ class Scenario(ScenarioConfig):
101
175
  @classmethod
102
176
  def configure(
103
177
  cls,
104
- testing_agent: Optional[TestingAgent] = None,
178
+ testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
105
179
  max_turns: Optional[int] = None,
106
180
  verbose: Optional[Union[bool, int]] = None,
107
181
  cache_key: Optional[str] = None,
@@ -118,3 +192,47 @@ class Scenario(ScenarioConfig):
118
192
  debug=debug,
119
193
  )
120
194
  )
195
+
196
+ # Scenario Scripting
197
+
198
+ def message(self, message: ChatCompletionMessageParam) -> ScriptStep:
199
+ return lambda state: state.message(message)
200
+
201
+ def user(
202
+ self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
203
+ ) -> ScriptStep:
204
+ return lambda state: state.user(content)
205
+
206
+ def agent(
207
+ self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
208
+ ) -> ScriptStep:
209
+ return lambda state: state.agent(content)
210
+
211
+ def judge(
212
+ self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
213
+ ) -> ScriptStep:
214
+ return lambda state: state.judge(content)
215
+
216
+ def proceed(
217
+ self,
218
+ turns: Optional[int] = None,
219
+ on_turn: Optional[
220
+ Union[
221
+ Callable[[ScenarioExecutor], None],
222
+ Callable[[ScenarioExecutor], Awaitable[None]],
223
+ ]
224
+ ] = None,
225
+ on_step: Optional[
226
+ Union[
227
+ Callable[[ScenarioExecutor], None],
228
+ Callable[[ScenarioExecutor], Awaitable[None]],
229
+ ]
230
+ ] = None,
231
+ ) -> ScriptStep:
232
+ return lambda state: state.proceed(turns, on_turn, on_step)
233
+
234
+ def succeed(self) -> ScriptStep:
235
+ return lambda state: state.succeed()
236
+
237
+ def fail(self) -> ScriptStep:
238
+ return lambda state: state.fail()
@@ -0,0 +1,16 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import ClassVar, Set
3
+
4
+ from .types import AgentInput, AgentReturnTypes, ScenarioAgentRole
5
+
6
+
7
+ class ScenarioAgentAdapter(ABC):
8
+ roles: ClassVar[Set[ScenarioAgentRole]] = {ScenarioAgentRole.AGENT}
9
+
10
+ def __init__(self, input: AgentInput):
11
+ super().__init__()
12
+ pass
13
+
14
+ @abstractmethod
15
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
16
+ pass