langwatch-scenario 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/METADATA +140 -79
- langwatch_scenario-0.4.0.dist-info/RECORD +18 -0
- scenario/__init__.py +223 -9
- scenario/agent_adapter.py +111 -0
- scenario/cache.py +132 -8
- scenario/config.py +154 -10
- scenario/error_messages.py +8 -38
- scenario/judge_agent.py +435 -0
- scenario/pytest_plugin.py +223 -15
- scenario/scenario_executor.py +428 -136
- scenario/scenario_state.py +205 -0
- scenario/script.py +361 -0
- scenario/types.py +193 -20
- scenario/user_simulator_agent.py +249 -0
- scenario/utils.py +252 -2
- langwatch_scenario-0.3.0.dist-info/RECORD +0 -16
- scenario/scenario.py +0 -238
- scenario/scenario_agent_adapter.py +0 -16
- scenario/testing_agent.py +0 -279
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
"""
|
2
|
+
Scenario state management module.
|
3
|
+
|
4
|
+
This module provides the ScenarioState class which tracks the current state
|
5
|
+
of a scenario execution, including conversation history, turn tracking, and
|
6
|
+
utility methods for inspecting the conversation.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from typing import List, Dict, Any, Optional, TYPE_CHECKING
|
10
|
+
from openai.types.chat import (
|
11
|
+
ChatCompletionMessageParam,
|
12
|
+
ChatCompletionMessageToolCallParam,
|
13
|
+
ChatCompletionUserMessageParam,
|
14
|
+
)
|
15
|
+
from pydantic import BaseModel
|
16
|
+
|
17
|
+
from scenario.config import ScenarioConfig
|
18
|
+
|
19
|
+
if TYPE_CHECKING:
|
20
|
+
from .scenario_executor import ScenarioExecutor
|
21
|
+
|
22
|
+
|
23
|
+
class ScenarioState(BaseModel):
|
24
|
+
"""
|
25
|
+
Represents the current state of a scenario execution.
|
26
|
+
|
27
|
+
This class provides access to the conversation history, turn information,
|
28
|
+
and utility methods for inspecting messages and tool calls. It's passed to
|
29
|
+
script step functions and available through AgentInput.scenario_state.
|
30
|
+
|
31
|
+
Attributes:
|
32
|
+
description: The scenario description that guides the simulation
|
33
|
+
messages: Complete conversation history as OpenAI-compatible messages
|
34
|
+
thread_id: Unique identifier for this conversation thread
|
35
|
+
current_turn: Current turn number in the conversation
|
36
|
+
config: Configuration settings for this scenario execution
|
37
|
+
|
38
|
+
Example:
|
39
|
+
```python
|
40
|
+
def check_agent_behavior(state: ScenarioState) -> None:
|
41
|
+
# Check if the agent called a specific tool
|
42
|
+
if state.has_tool_call("get_weather"):
|
43
|
+
print("Agent successfully called weather tool")
|
44
|
+
|
45
|
+
# Get the last user message
|
46
|
+
last_user = state.last_user_message()
|
47
|
+
print(f"User said: {last_user['content']}")
|
48
|
+
|
49
|
+
# Check conversation length
|
50
|
+
if len(state.messages) > 10:
|
51
|
+
print("Conversation is getting long")
|
52
|
+
|
53
|
+
# Use in scenario script
|
54
|
+
result = await scenario.run(
|
55
|
+
name="tool usage test",
|
56
|
+
description="Test that agent uses the correct tools",
|
57
|
+
agents=[
|
58
|
+
my_agent,
|
59
|
+
scenario.UserSimulatorAgent(),
|
60
|
+
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
61
|
+
],
|
62
|
+
script=[
|
63
|
+
scenario.user("What's the weather like?"),
|
64
|
+
scenario.agent(),
|
65
|
+
check_agent_behavior, # Custom inspection function
|
66
|
+
scenario.succeed()
|
67
|
+
]
|
68
|
+
)
|
69
|
+
```
|
70
|
+
"""
|
71
|
+
description: str
|
72
|
+
messages: List[ChatCompletionMessageParam]
|
73
|
+
thread_id: str
|
74
|
+
current_turn: int
|
75
|
+
config: ScenarioConfig
|
76
|
+
|
77
|
+
_executor: "ScenarioExecutor"
|
78
|
+
|
79
|
+
def add_message(self, message: ChatCompletionMessageParam):
|
80
|
+
"""
|
81
|
+
Add a message to the conversation history.
|
82
|
+
|
83
|
+
This method delegates to the scenario executor to properly handle
|
84
|
+
message broadcasting and state updates.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
message: OpenAI-compatible message to add to the conversation
|
88
|
+
|
89
|
+
Example:
|
90
|
+
```python
|
91
|
+
def inject_system_message(state: ScenarioState) -> None:
|
92
|
+
state.add_message({
|
93
|
+
"role": "system",
|
94
|
+
"content": "The user is now in a hurry"
|
95
|
+
})
|
96
|
+
```
|
97
|
+
"""
|
98
|
+
self._executor.add_message(message)
|
99
|
+
|
100
|
+
def last_message(self) -> ChatCompletionMessageParam:
|
101
|
+
"""
|
102
|
+
Get the most recent message in the conversation.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
The last message in the conversation history
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
ValueError: If no messages exist in the conversation
|
109
|
+
|
110
|
+
Example:
|
111
|
+
```python
|
112
|
+
def check_last_response(state: ScenarioState) -> None:
|
113
|
+
last = state.last_message()
|
114
|
+
if last["role"] == "assistant":
|
115
|
+
content = last.get("content", "")
|
116
|
+
assert "helpful" in content.lower()
|
117
|
+
```
|
118
|
+
"""
|
119
|
+
if len(self.messages) == 0:
|
120
|
+
raise ValueError("No messages found")
|
121
|
+
return self.messages[-1]
|
122
|
+
|
123
|
+
def last_user_message(self) -> ChatCompletionUserMessageParam:
|
124
|
+
"""
|
125
|
+
Get the most recent user message in the conversation.
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
The last user message in the conversation history
|
129
|
+
|
130
|
+
Raises:
|
131
|
+
ValueError: If no user messages exist in the conversation
|
132
|
+
|
133
|
+
Example:
|
134
|
+
```python
|
135
|
+
def analyze_user_intent(state: ScenarioState) -> None:
|
136
|
+
user_msg = state.last_user_message()
|
137
|
+
content = user_msg["content"]
|
138
|
+
|
139
|
+
if isinstance(content, str):
|
140
|
+
if "urgent" in content.lower():
|
141
|
+
print("User expressed urgency")
|
142
|
+
```
|
143
|
+
"""
|
144
|
+
user_messages = [m for m in self.messages if m["role"] == "user"]
|
145
|
+
if not user_messages:
|
146
|
+
raise ValueError("No user messages found")
|
147
|
+
return user_messages[-1]
|
148
|
+
|
149
|
+
def last_tool_call(
|
150
|
+
self, tool_name: str
|
151
|
+
) -> Optional[ChatCompletionMessageToolCallParam]:
|
152
|
+
"""
|
153
|
+
Find the most recent call to a specific tool in the conversation.
|
154
|
+
|
155
|
+
Searches through the conversation history in reverse order to find
|
156
|
+
the last time the specified tool was called by an assistant.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
tool_name: Name of the tool to search for
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
The tool call object if found, None otherwise
|
163
|
+
|
164
|
+
Example:
|
165
|
+
```python
|
166
|
+
def verify_weather_call(state: ScenarioState) -> None:
|
167
|
+
weather_call = state.last_tool_call("get_current_weather")
|
168
|
+
if weather_call:
|
169
|
+
args = json.loads(weather_call["function"]["arguments"])
|
170
|
+
assert "location" in args
|
171
|
+
print(f"Weather requested for: {args['location']}")
|
172
|
+
```
|
173
|
+
"""
|
174
|
+
for message in reversed(self.messages):
|
175
|
+
if message["role"] == "assistant" and "tool_calls" in message:
|
176
|
+
for tool_call in message["tool_calls"]:
|
177
|
+
if tool_call["function"]["name"] == tool_name:
|
178
|
+
return tool_call
|
179
|
+
return None
|
180
|
+
|
181
|
+
def has_tool_call(self, tool_name: str) -> bool:
|
182
|
+
"""
|
183
|
+
Check if a specific tool has been called in the conversation.
|
184
|
+
|
185
|
+
This is a convenience method that returns True if the specified
|
186
|
+
tool has been called at any point in the conversation.
|
187
|
+
|
188
|
+
Args:
|
189
|
+
tool_name: Name of the tool to check for
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
True if the tool has been called, False otherwise
|
193
|
+
|
194
|
+
Example:
|
195
|
+
```python
|
196
|
+
def ensure_tool_usage(state: ScenarioState) -> None:
|
197
|
+
# Verify the agent used required tools
|
198
|
+
assert state.has_tool_call("search_database")
|
199
|
+
assert state.has_tool_call("format_results")
|
200
|
+
|
201
|
+
# Check it didn't use forbidden tools
|
202
|
+
assert not state.has_tool_call("delete_data")
|
203
|
+
```
|
204
|
+
"""
|
205
|
+
return self.last_tool_call(tool_name) is not None
|
scenario/script.py
ADDED
@@ -0,0 +1,361 @@
|
|
1
|
+
"""
|
2
|
+
Scenario script DSL (Domain Specific Language) module.
|
3
|
+
|
4
|
+
This module provides a collection of functions that form a declarative language
|
5
|
+
for controlling scenario execution flow. These functions can be used to create
|
6
|
+
scripts that precisely control how conversations unfold, when evaluations occur,
|
7
|
+
and when scenarios should succeed or fail.
|
8
|
+
"""
|
9
|
+
|
10
|
+
from typing import Awaitable, Callable, Optional, Union, TYPE_CHECKING
|
11
|
+
|
12
|
+
from .types import ScriptStep
|
13
|
+
|
14
|
+
from openai.types.chat import ChatCompletionMessageParam
|
15
|
+
|
16
|
+
if TYPE_CHECKING:
|
17
|
+
from scenario.scenario_state import ScenarioState
|
18
|
+
|
19
|
+
|
20
|
+
def message(message: ChatCompletionMessageParam) -> ScriptStep:
|
21
|
+
"""
|
22
|
+
Add a specific message to the conversation.
|
23
|
+
|
24
|
+
This function allows you to inject any OpenAI-compatible message directly
|
25
|
+
into the conversation at a specific point in the script. Useful for
|
26
|
+
simulating tool responses, system messages, or specific conversational states.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
message: OpenAI-compatible message to add to the conversation
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
ScriptStep function that can be used in scenario scripts
|
33
|
+
|
34
|
+
Example:
|
35
|
+
```python
|
36
|
+
result = await scenario.run(
|
37
|
+
name="tool response test",
|
38
|
+
description="Testing tool call responses",
|
39
|
+
agents=[
|
40
|
+
my_agent,
|
41
|
+
scenario.UserSimulatorAgent(),
|
42
|
+
scenario.JudgeAgent(criteria=["Agent uses weather tool correctly"])
|
43
|
+
],
|
44
|
+
script=[
|
45
|
+
scenario.user("What's the weather?"),
|
46
|
+
scenario.agent(), # Agent calls weather tool
|
47
|
+
scenario.message({
|
48
|
+
"role": "tool",
|
49
|
+
"tool_call_id": "call_123",
|
50
|
+
"content": json.dumps({"temperature": "75°F", "condition": "sunny"})
|
51
|
+
}),
|
52
|
+
scenario.agent(), # Agent processes tool response
|
53
|
+
scenario.succeed()
|
54
|
+
]
|
55
|
+
)
|
56
|
+
```
|
57
|
+
"""
|
58
|
+
return lambda state: state._executor.message(message)
|
59
|
+
|
60
|
+
|
61
|
+
def user(
|
62
|
+
content: Optional[Union[str, ChatCompletionMessageParam]] = None,
|
63
|
+
) -> ScriptStep:
|
64
|
+
"""
|
65
|
+
Generate or specify a user message in the conversation.
|
66
|
+
|
67
|
+
If content is provided, it will be used as the user message. If no content
|
68
|
+
is provided, the user simulator agent will automatically generate an
|
69
|
+
appropriate message based on the scenario context.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
content: Optional user message content. Can be a string or full message dict.
|
73
|
+
If None, the user simulator will generate content automatically.
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
ScriptStep function that can be used in scenario scripts
|
77
|
+
|
78
|
+
Example:
|
79
|
+
```python
|
80
|
+
result = await scenario.run(
|
81
|
+
name="user interaction test",
|
82
|
+
description="Testing specific user inputs",
|
83
|
+
agents=[
|
84
|
+
my_agent,
|
85
|
+
scenario.UserSimulatorAgent(),
|
86
|
+
scenario.JudgeAgent(criteria=["Agent responds helpfully to user"])
|
87
|
+
],
|
88
|
+
script=[
|
89
|
+
# Specific user message
|
90
|
+
scenario.user("I need help with Python"),
|
91
|
+
scenario.agent(),
|
92
|
+
|
93
|
+
# Auto-generated user message based on scenario context
|
94
|
+
scenario.user(),
|
95
|
+
scenario.agent(),
|
96
|
+
|
97
|
+
# Structured user message with multimodal content
|
98
|
+
scenario.user({
|
99
|
+
"role": "user",
|
100
|
+
"content": [
|
101
|
+
{"type": "text", "text": "What's in this image?"},
|
102
|
+
{"type": "image_url", "image_url": {"url": "data:image/..."}}
|
103
|
+
]
|
104
|
+
}),
|
105
|
+
scenario.succeed()
|
106
|
+
]
|
107
|
+
)
|
108
|
+
```
|
109
|
+
"""
|
110
|
+
return lambda state: state._executor.user(content)
|
111
|
+
|
112
|
+
|
113
|
+
def agent(
|
114
|
+
content: Optional[Union[str, ChatCompletionMessageParam]] = None,
|
115
|
+
) -> ScriptStep:
|
116
|
+
"""
|
117
|
+
Generate or specify an agent response in the conversation.
|
118
|
+
|
119
|
+
If content is provided, it will be used as the agent response. If no content
|
120
|
+
is provided, the agent under test will be called to generate its response
|
121
|
+
based on the current conversation state.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
content: Optional agent response content. Can be a string or full message dict.
|
125
|
+
If None, the agent under test will generate content automatically.
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
ScriptStep function that can be used in scenario scripts
|
129
|
+
|
130
|
+
Example:
|
131
|
+
```python
|
132
|
+
result = await scenario.run(
|
133
|
+
name="agent response test",
|
134
|
+
description="Testing agent responses",
|
135
|
+
agents=[
|
136
|
+
my_agent,
|
137
|
+
scenario.UserSimulatorAgent(),
|
138
|
+
scenario.JudgeAgent(criteria=["Agent provides appropriate responses"])
|
139
|
+
],
|
140
|
+
script=[
|
141
|
+
scenario.user("Hello"),
|
142
|
+
|
143
|
+
# Let agent generate its own response
|
144
|
+
scenario.agent(),
|
145
|
+
|
146
|
+
# Or specify exact agent response for testing edge cases
|
147
|
+
scenario.agent("I'm sorry, I'm currently unavailable"),
|
148
|
+
scenario.user(), # See how user simulator reacts
|
149
|
+
|
150
|
+
# Structured agent response with tool calls
|
151
|
+
scenario.agent({
|
152
|
+
"role": "assistant",
|
153
|
+
"content": "Let me search for that information",
|
154
|
+
"tool_calls": [{"id": "call_123", "type": "function", ...}]
|
155
|
+
}),
|
156
|
+
scenario.succeed()
|
157
|
+
]
|
158
|
+
)
|
159
|
+
```
|
160
|
+
"""
|
161
|
+
return lambda state: state._executor.agent(content)
|
162
|
+
|
163
|
+
|
164
|
+
def judge(
|
165
|
+
content: Optional[Union[str, ChatCompletionMessageParam]] = None,
|
166
|
+
) -> ScriptStep:
|
167
|
+
"""
|
168
|
+
Invoke the judge agent to evaluate the current conversation state.
|
169
|
+
|
170
|
+
This function forces the judge agent to make a decision about whether
|
171
|
+
the scenario should continue or end with a success/failure verdict.
|
172
|
+
The judge will evaluate based on its configured criteria.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
content: Optional message content for the judge. Usually None to let
|
176
|
+
the judge evaluate based on its criteria.
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
ScriptStep function that can be used in scenario scripts
|
180
|
+
|
181
|
+
Example:
|
182
|
+
```python
|
183
|
+
result = await scenario.run(
|
184
|
+
name="judge evaluation test",
|
185
|
+
description="Testing judge at specific points",
|
186
|
+
agents=[
|
187
|
+
my_agent,
|
188
|
+
scenario.UserSimulatorAgent(),
|
189
|
+
scenario.JudgeAgent(criteria=["Agent provides coding help effectively"])
|
190
|
+
],
|
191
|
+
script=[
|
192
|
+
scenario.user("Can you help me code?"),
|
193
|
+
scenario.agent(),
|
194
|
+
|
195
|
+
# Force judge evaluation after first exchange
|
196
|
+
scenario.judge(), # May continue or end scenario
|
197
|
+
|
198
|
+
# If scenario continues...
|
199
|
+
scenario.user(),
|
200
|
+
scenario.agent(),
|
201
|
+
scenario.judge(), # Final evaluation
|
202
|
+
]
|
203
|
+
)
|
204
|
+
```
|
205
|
+
"""
|
206
|
+
return lambda state: state._executor.judge(content)
|
207
|
+
|
208
|
+
|
209
|
+
def proceed(
|
210
|
+
turns: Optional[int] = None,
|
211
|
+
on_turn: Optional[
|
212
|
+
Union[
|
213
|
+
Callable[["ScenarioState"], None],
|
214
|
+
Callable[["ScenarioState"], Awaitable[None]],
|
215
|
+
]
|
216
|
+
] = None,
|
217
|
+
on_step: Optional[
|
218
|
+
Union[
|
219
|
+
Callable[["ScenarioState"], None],
|
220
|
+
Callable[["ScenarioState"], Awaitable[None]],
|
221
|
+
]
|
222
|
+
] = None,
|
223
|
+
) -> ScriptStep:
|
224
|
+
"""
|
225
|
+
Let the scenario proceed automatically for a specified number of turns.
|
226
|
+
|
227
|
+
This function allows the scenario to run automatically with the normal
|
228
|
+
agent interaction flow (user -> agent -> judge evaluation). You can
|
229
|
+
optionally provide callbacks to execute custom logic at each turn or step.
|
230
|
+
|
231
|
+
Args:
|
232
|
+
turns: Number of turns to proceed automatically. If None, proceeds until
|
233
|
+
the judge agent decides to end the scenario or max_turns is reached.
|
234
|
+
on_turn: Optional callback function called at the end of each turn
|
235
|
+
on_step: Optional callback function called after each agent interaction
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
ScriptStep function that can be used in scenario scripts
|
239
|
+
|
240
|
+
Example:
|
241
|
+
```python
|
242
|
+
def log_progress(state: ScenarioState) -> None:
|
243
|
+
print(f"Turn {state.current_turn}: {len(state.messages)} messages")
|
244
|
+
|
245
|
+
def check_tool_usage(state: ScenarioState) -> None:
|
246
|
+
if state.has_tool_call("dangerous_action"):
|
247
|
+
raise AssertionError("Agent used forbidden tool!")
|
248
|
+
|
249
|
+
result = await scenario.run(
|
250
|
+
name="automatic proceeding test",
|
251
|
+
description="Let scenario run with monitoring",
|
252
|
+
agents=[
|
253
|
+
my_agent,
|
254
|
+
scenario.UserSimulatorAgent(),
|
255
|
+
scenario.JudgeAgent(criteria=["Agent behaves safely and helpfully"])
|
256
|
+
],
|
257
|
+
script=[
|
258
|
+
scenario.user("Let's start"),
|
259
|
+
scenario.agent(),
|
260
|
+
|
261
|
+
# Let it proceed for 3 turns with monitoring
|
262
|
+
scenario.proceed(
|
263
|
+
turns=3,
|
264
|
+
on_turn=log_progress,
|
265
|
+
on_step=check_tool_usage
|
266
|
+
),
|
267
|
+
|
268
|
+
# Then do final evaluation
|
269
|
+
scenario.judge()
|
270
|
+
]
|
271
|
+
)
|
272
|
+
```
|
273
|
+
"""
|
274
|
+
return lambda state: state._executor.proceed(turns, on_turn, on_step)
|
275
|
+
|
276
|
+
|
277
|
+
def succeed(reasoning: Optional[str] = None) -> ScriptStep:
|
278
|
+
"""
|
279
|
+
Immediately end the scenario with a success result.
|
280
|
+
|
281
|
+
This function terminates the scenario execution and marks it as successful,
|
282
|
+
bypassing any further agent interactions or judge evaluations.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
reasoning: Optional explanation for why the scenario succeeded
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
ScriptStep function that can be used in scenario scripts
|
289
|
+
|
290
|
+
Example:
|
291
|
+
```python
|
292
|
+
def custom_success_check(state: ScenarioState) -> None:
|
293
|
+
last_msg = state.last_message()
|
294
|
+
if "solution" in last_msg.get("content", "").lower():
|
295
|
+
# Custom success condition met
|
296
|
+
return scenario.succeed("Agent provided a solution")()
|
297
|
+
|
298
|
+
result = await scenario.run(
|
299
|
+
name="custom success test",
|
300
|
+
description="Test custom success conditions",
|
301
|
+
agents=[
|
302
|
+
my_agent,
|
303
|
+
scenario.UserSimulatorAgent(),
|
304
|
+
scenario.JudgeAgent(criteria=["Agent provides a solution"])
|
305
|
+
],
|
306
|
+
script=[
|
307
|
+
scenario.user("I need a solution"),
|
308
|
+
scenario.agent(),
|
309
|
+
custom_success_check,
|
310
|
+
|
311
|
+
# Or explicit success
|
312
|
+
scenario.succeed("Agent completed the task successfully")
|
313
|
+
]
|
314
|
+
)
|
315
|
+
```
|
316
|
+
"""
|
317
|
+
return lambda state: state._executor.succeed(reasoning)
|
318
|
+
|
319
|
+
|
320
|
+
def fail(reasoning: Optional[str] = None) -> ScriptStep:
|
321
|
+
"""
|
322
|
+
Immediately end the scenario with a failure result.
|
323
|
+
|
324
|
+
This function terminates the scenario execution and marks it as failed,
|
325
|
+
bypassing any further agent interactions or judge evaluations.
|
326
|
+
|
327
|
+
Args:
|
328
|
+
reasoning: Optional explanation for why the scenario failed
|
329
|
+
|
330
|
+
Returns:
|
331
|
+
ScriptStep function that can be used in scenario scripts
|
332
|
+
|
333
|
+
Example:
|
334
|
+
```python
|
335
|
+
def safety_check(state: ScenarioState) -> None:
|
336
|
+
last_msg = state.last_message()
|
337
|
+
content = last_msg.get("content", "")
|
338
|
+
|
339
|
+
if "harmful" in content.lower():
|
340
|
+
return scenario.fail("Agent produced harmful content")()
|
341
|
+
|
342
|
+
result = await scenario.run(
|
343
|
+
name="safety check test",
|
344
|
+
description="Test safety boundaries",
|
345
|
+
agents=[
|
346
|
+
my_agent,
|
347
|
+
scenario.UserSimulatorAgent(),
|
348
|
+
scenario.JudgeAgent(criteria=["Agent maintains safety guidelines"])
|
349
|
+
],
|
350
|
+
script=[
|
351
|
+
scenario.user("Tell me something dangerous"),
|
352
|
+
scenario.agent(),
|
353
|
+
safety_check,
|
354
|
+
|
355
|
+
# Or explicit failure
|
356
|
+
scenario.fail("Agent failed to meet safety requirements")
|
357
|
+
]
|
358
|
+
)
|
359
|
+
```
|
360
|
+
"""
|
361
|
+
return lambda state: state._executor.fail(reasoning)
|