langwatch-scenario 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/METADATA +95 -34
- langwatch_scenario-0.3.0.dist-info/RECORD +16 -0
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/WHEEL +1 -1
- scenario/__init__.py +13 -3
- scenario/config.py +18 -7
- scenario/error_messages.py +81 -23
- scenario/pytest_plugin.py +8 -8
- scenario/scenario.py +144 -26
- scenario/scenario_agent_adapter.py +16 -0
- scenario/scenario_executor.py +405 -143
- scenario/testing_agent.py +123 -109
- scenario/types.py +96 -0
- scenario/utils.py +148 -5
- langwatch_scenario-0.1.3.dist-info/RECORD +0 -15
- scenario/result.py +0 -81
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.1.3.dist-info → langwatch_scenario-0.3.0.dist-info}/top_level.txt +0 -0
scenario/scenario.py
CHANGED
@@ -2,19 +2,33 @@
|
|
2
2
|
Scenario module: defines the core Scenario class for agent testing.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import
|
5
|
+
from typing import (
|
6
|
+
Awaitable,
|
7
|
+
Callable,
|
8
|
+
List,
|
9
|
+
Dict,
|
10
|
+
Any,
|
11
|
+
Optional,
|
12
|
+
Type,
|
13
|
+
TypedDict,
|
14
|
+
Union,
|
15
|
+
)
|
6
16
|
import asyncio
|
7
17
|
import concurrent.futures
|
8
|
-
from functools import partial
|
9
18
|
|
10
19
|
from scenario.config import ScenarioConfig
|
20
|
+
from scenario.error_messages import (
|
21
|
+
default_config_error_message,
|
22
|
+
message_invalid_agent_type,
|
23
|
+
)
|
24
|
+
from scenario.scenario_agent_adapter import ScenarioAgentAdapter
|
11
25
|
from scenario.scenario_executor import ScenarioExecutor
|
12
26
|
|
13
|
-
from .
|
14
|
-
from .testing_agent import TestingAgent
|
27
|
+
from .types import ScenarioResult, ScriptStep
|
15
28
|
|
16
29
|
from openai.types.chat import ChatCompletionMessageParam
|
17
30
|
|
31
|
+
|
18
32
|
class AgentResult(TypedDict, total=False):
|
19
33
|
message: str
|
20
34
|
messages: List[ChatCompletionMessageParam]
|
@@ -27,44 +41,94 @@ class Scenario(ScenarioConfig):
|
|
27
41
|
|
28
42
|
It includes:
|
29
43
|
- A description of the scenario
|
30
|
-
-
|
31
|
-
- Failure criteria to determine if the agent failed
|
32
|
-
- An optional strategy that guides the testing agent
|
44
|
+
- Criteria to determine if the agent behaved correctly
|
33
45
|
- Optional additional parameters
|
34
46
|
"""
|
35
47
|
|
48
|
+
name: str
|
36
49
|
description: str
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
agents: List[Type[ScenarioAgentAdapter]]
|
51
|
+
criteria: List[str]
|
52
|
+
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
name: str,
|
56
|
+
description: str,
|
57
|
+
criteria: List[str] = [],
|
58
|
+
agent: Optional[Type[ScenarioAgentAdapter]] = None,
|
59
|
+
testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
|
60
|
+
agents: List[Type[ScenarioAgentAdapter]] = [],
|
61
|
+
max_turns: Optional[int] = None,
|
62
|
+
verbose: Optional[Union[bool, int]] = None,
|
63
|
+
cache_key: Optional[str] = None,
|
64
|
+
debug: Optional[bool] = None,
|
65
|
+
):
|
46
66
|
"""Validate scenario configuration after initialization."""
|
47
67
|
|
48
|
-
|
68
|
+
config = ScenarioConfig(
|
69
|
+
testing_agent=testing_agent,
|
70
|
+
max_turns=max_turns,
|
71
|
+
verbose=verbose,
|
72
|
+
cache_key=cache_key,
|
73
|
+
debug=debug,
|
74
|
+
)
|
75
|
+
|
76
|
+
kwargs = config.items()
|
77
|
+
default_config: Optional[ScenarioConfig] = getattr(
|
78
|
+
Scenario, "default_config", None
|
79
|
+
)
|
49
80
|
if default_config:
|
50
|
-
kwargs =
|
81
|
+
kwargs = default_config.merge(config).items()
|
82
|
+
|
83
|
+
if not name:
|
84
|
+
raise ValueError("Scenario name cannot be empty")
|
85
|
+
kwargs["name"] = name
|
51
86
|
|
52
87
|
if not description:
|
53
88
|
raise ValueError("Scenario description cannot be empty")
|
54
89
|
kwargs["description"] = description
|
55
90
|
|
56
|
-
|
57
|
-
raise ValueError("Scenario must have at least one success criterion")
|
91
|
+
kwargs["criteria"] = criteria
|
58
92
|
|
59
|
-
if kwargs.get("max_turns",
|
93
|
+
if kwargs.get("max_turns", 10) < 1:
|
60
94
|
raise ValueError("max_turns must be a positive integer")
|
61
95
|
|
62
|
-
|
63
|
-
|
64
|
-
|
96
|
+
if not agents and not agent:
|
97
|
+
raise ValueError(
|
98
|
+
"Missing required argument `agent`. Either `agent` or `agents` argument must be provided for the Scenario"
|
99
|
+
)
|
100
|
+
|
101
|
+
if not agents and not kwargs.get("testing_agent"):
|
102
|
+
raise Exception(default_config_error_message)
|
103
|
+
|
104
|
+
agents = agents or [
|
105
|
+
kwargs.get("testing_agent"),
|
106
|
+
agent, # type: ignore
|
107
|
+
]
|
108
|
+
|
109
|
+
# Ensure each agent is a ScenarioAgentAdapter
|
110
|
+
for agent in agents:
|
111
|
+
if (
|
112
|
+
not agent
|
113
|
+
or not isinstance(agent, type)
|
114
|
+
or not issubclass(agent, ScenarioAgentAdapter)
|
115
|
+
):
|
116
|
+
raise ValueError(message_invalid_agent_type(agent))
|
117
|
+
kwargs["agents"] = agents
|
65
118
|
|
66
119
|
super().__init__(**kwargs)
|
67
120
|
|
121
|
+
def script(self, script: List[ScriptStep]):
|
122
|
+
class ScriptedScenario:
|
123
|
+
def __init__(self, scenario: "Scenario"):
|
124
|
+
self._scenario = scenario
|
125
|
+
|
126
|
+
async def run(
|
127
|
+
self, context: Optional[Dict[str, Any]] = None
|
128
|
+
) -> ScenarioResult:
|
129
|
+
return await self._scenario._run(context, script)
|
130
|
+
|
131
|
+
return ScriptedScenario(self)
|
68
132
|
|
69
133
|
async def run(self, context: Optional[Dict[str, Any]] = None) -> ScenarioResult:
|
70
134
|
"""
|
@@ -77,17 +141,27 @@ class Scenario(ScenarioConfig):
|
|
77
141
|
ScenarioResult containing the test outcome
|
78
142
|
"""
|
79
143
|
|
144
|
+
return await self._run(context, None)
|
145
|
+
|
146
|
+
async def _run(
|
147
|
+
self,
|
148
|
+
context: Optional[Dict[str, Any]] = None,
|
149
|
+
script: Optional[List[ScriptStep]] = None,
|
150
|
+
) -> ScenarioResult:
|
80
151
|
# We'll use a thread pool to run the execution logic, we
|
81
152
|
# require a separate thread because even though asyncio is
|
82
153
|
# being used throughout, any user code on the callback can
|
83
154
|
# be blocking, preventing them from running scenarios in parallel
|
84
155
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
156
|
+
|
85
157
|
def run_in_thread():
|
86
158
|
loop = asyncio.new_event_loop()
|
87
159
|
asyncio.set_event_loop(loop)
|
88
160
|
|
89
161
|
try:
|
90
|
-
return loop.run_until_complete(
|
162
|
+
return loop.run_until_complete(
|
163
|
+
ScenarioExecutor(self, context, script).run()
|
164
|
+
)
|
91
165
|
finally:
|
92
166
|
loop.close()
|
93
167
|
|
@@ -101,7 +175,7 @@ class Scenario(ScenarioConfig):
|
|
101
175
|
@classmethod
|
102
176
|
def configure(
|
103
177
|
cls,
|
104
|
-
testing_agent: Optional[
|
178
|
+
testing_agent: Optional[Type[ScenarioAgentAdapter]] = None,
|
105
179
|
max_turns: Optional[int] = None,
|
106
180
|
verbose: Optional[Union[bool, int]] = None,
|
107
181
|
cache_key: Optional[str] = None,
|
@@ -118,3 +192,47 @@ class Scenario(ScenarioConfig):
|
|
118
192
|
debug=debug,
|
119
193
|
)
|
120
194
|
)
|
195
|
+
|
196
|
+
# Scenario Scripting
|
197
|
+
|
198
|
+
def message(self, message: ChatCompletionMessageParam) -> ScriptStep:
|
199
|
+
return lambda state: state.message(message)
|
200
|
+
|
201
|
+
def user(
|
202
|
+
self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
|
203
|
+
) -> ScriptStep:
|
204
|
+
return lambda state: state.user(content)
|
205
|
+
|
206
|
+
def agent(
|
207
|
+
self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
|
208
|
+
) -> ScriptStep:
|
209
|
+
return lambda state: state.agent(content)
|
210
|
+
|
211
|
+
def judge(
|
212
|
+
self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
|
213
|
+
) -> ScriptStep:
|
214
|
+
return lambda state: state.judge(content)
|
215
|
+
|
216
|
+
def proceed(
|
217
|
+
self,
|
218
|
+
turns: Optional[int] = None,
|
219
|
+
on_turn: Optional[
|
220
|
+
Union[
|
221
|
+
Callable[[ScenarioExecutor], None],
|
222
|
+
Callable[[ScenarioExecutor], Awaitable[None]],
|
223
|
+
]
|
224
|
+
] = None,
|
225
|
+
on_step: Optional[
|
226
|
+
Union[
|
227
|
+
Callable[[ScenarioExecutor], None],
|
228
|
+
Callable[[ScenarioExecutor], Awaitable[None]],
|
229
|
+
]
|
230
|
+
] = None,
|
231
|
+
) -> ScriptStep:
|
232
|
+
return lambda state: state.proceed(turns, on_turn, on_step)
|
233
|
+
|
234
|
+
def succeed(self) -> ScriptStep:
|
235
|
+
return lambda state: state.succeed()
|
236
|
+
|
237
|
+
def fail(self) -> ScriptStep:
|
238
|
+
return lambda state: state.fail()
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import ClassVar, Set
|
3
|
+
|
4
|
+
from .types import AgentInput, AgentReturnTypes, ScenarioAgentRole
|
5
|
+
|
6
|
+
|
7
|
+
class ScenarioAgentAdapter(ABC):
|
8
|
+
roles: ClassVar[Set[ScenarioAgentRole]] = {ScenarioAgentRole.AGENT}
|
9
|
+
|
10
|
+
def __init__(self, input: AgentInput):
|
11
|
+
super().__init__()
|
12
|
+
pass
|
13
|
+
|
14
|
+
@abstractmethod
|
15
|
+
async def call(self, input: AgentInput) -> AgentReturnTypes:
|
16
|
+
pass
|