langwatch-scenario 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,13 @@
1
1
  """
2
- ScenarioExecutor module: holds the scenario execution logic and state, orchestrating the conversation between the testing agent and the agent under test.
2
+ Scenario execution engine for agent testing.
3
+
4
+ This module contains the core ScenarioExecutor class that orchestrates the execution
5
+ of scenario tests, managing the interaction between user simulators, agents under test,
6
+ and judge agents to determine test success or failure.
3
7
  """
4
8
 
5
9
  import sys
6
10
  from typing import (
7
- TYPE_CHECKING,
8
11
  Awaitable,
9
12
  Callable,
10
13
  Dict,
@@ -17,7 +20,10 @@ from typing import (
17
20
  )
18
21
  import time
19
22
  import termcolor
23
+ import asyncio
24
+ import concurrent.futures
20
25
 
26
+ from scenario.config import ScenarioConfig
21
27
  from scenario.utils import (
22
28
  await_if_awaitable,
23
29
  check_valid_return_type,
@@ -28,83 +34,346 @@ from scenario.utils import (
28
34
  from openai.types.chat import (
29
35
  ChatCompletionMessageParam,
30
36
  ChatCompletionUserMessageParam,
31
- ChatCompletionMessageToolCallParam,
32
37
  )
33
38
 
34
- from .types import AgentInput, ScenarioAgentRole, ScenarioResult, ScriptStep
39
+ from .types import AgentInput, AgentRole, ScenarioResult, ScriptStep
35
40
  from .error_messages import agent_response_not_awaitable
36
41
  from .cache import context_scenario
37
- from .scenario_agent_adapter import ScenarioAgentAdapter
42
+ from .agent_adapter import AgentAdapter
43
+ from .script import proceed
38
44
  from pksuid import PKSUID
39
-
40
- if TYPE_CHECKING:
41
- from scenario.scenario import Scenario
45
+ from .scenario_state import ScenarioState
42
46
 
43
47
 
44
48
  class ScenarioExecutor:
45
- scenario: "Scenario"
46
- messages: List[ChatCompletionMessageParam]
47
- thread_id: str
48
- current_turn: int
49
-
50
- _context: Optional[Dict[str, Any]]
51
- _script: List[ScriptStep]
52
- _agents: List[ScenarioAgentAdapter]
49
+ """
50
+ Core orchestrator for scenario-based agent testing.
51
+
52
+ The ScenarioExecutor manages the complete lifecycle of a scenario test, including:
53
+ - Orchestrating conversations between user simulators, agents, and judges
54
+ - Managing turn-based execution flow
55
+ - Handling script-based scenario control
56
+ - Collecting and reporting test results
57
+ - Supporting debug mode for interactive testing
58
+
59
+ This class serves as both a builder (for configuration) and an executor (for running tests).
60
+ Most users will interact with it through the high-level `scenario.run()` function rather
61
+ than instantiating it directly.
62
+
63
+ Attributes:
64
+ name: Human-readable name for the scenario
65
+ description: Detailed description of what the scenario tests
66
+ agents: List of agent adapters participating in the scenario
67
+ script: Optional list of script steps to control scenario flow
68
+ config: Configuration settings for execution behavior
69
+
70
+ Example:
71
+ ```python
72
+ # Direct instantiation (less common)
73
+ executor = ScenarioExecutor(
74
+ name="weather query test",
75
+ description="User asks about weather, agent should provide helpful response",
76
+ agents=[
77
+ weather_agent,
78
+ scenario.UserSimulatorAgent(),
79
+ scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
80
+ ],
81
+ max_turns=10,
82
+ verbose=True
83
+ )
84
+ result = await executor._run()
85
+
86
+ # Preferred high-level API
87
+ result = await scenario.run(
88
+ name="weather query test",
89
+ description="User asks about weather, agent should provide helpful response",
90
+ agents=[
91
+ weather_agent,
92
+ scenario.UserSimulatorAgent(),
93
+ scenario.JudgeAgent(criteria=["Agent provides helpful weather info"])
94
+ ]
95
+ )
96
+ ```
97
+
98
+ Note:
99
+ - Scenarios run in isolated thread pools to support parallel execution
100
+ - All agent interactions are cached when cache_key is configured
101
+ - Debug mode allows step-by-step execution with user intervention
102
+ - Results include detailed timing information and conversation history
103
+ """
104
+ name: str
105
+ description: str
106
+ agents: List[AgentAdapter]
107
+ script: List[ScriptStep]
108
+
109
+ config: ScenarioConfig
110
+
111
+ _state: ScenarioState
53
112
  _total_start_time: float
54
113
  _pending_messages: Dict[int, List[ChatCompletionMessageParam]]
55
114
 
56
- _pending_roles_on_turn: List[ScenarioAgentRole] = []
57
- _pending_agents_on_turn: Set[ScenarioAgentAdapter] = set()
115
+ _pending_roles_on_turn: List[AgentRole] = []
116
+ _pending_agents_on_turn: Set[AgentAdapter] = set()
58
117
  _agent_times: Dict[int, float] = {}
59
118
 
60
119
  def __init__(
61
120
  self,
62
- scenario: "Scenario",
63
- context: Optional[Dict[str, Any]] = None,
121
+ name: str,
122
+ description: str,
123
+ agents: List[AgentAdapter] = [],
64
124
  script: Optional[List[ScriptStep]] = None,
125
+ # Config
126
+ max_turns: Optional[int] = None,
127
+ verbose: Optional[Union[bool, int]] = None,
128
+ cache_key: Optional[str] = None,
129
+ debug: Optional[bool] = None,
65
130
  ):
66
- super().__init__()
131
+ """
132
+ Initialize a scenario executor.
133
+
134
+ Args:
135
+ name: Human-readable name for the scenario (used in reports and logs)
136
+ description: Detailed description of what the scenario tests.
137
+ This guides the user simulator's behavior and provides context.
138
+ agents: List of agent adapters participating in the scenario.
139
+ Typically includes: agent under test, user simulator, and judge.
140
+ script: Optional list of script steps to control scenario flow.
141
+ If not provided, defaults to automatic proceeding.
142
+ max_turns: Maximum number of conversation turns before timeout.
143
+ Overrides global configuration for this scenario.
144
+ verbose: Whether to show detailed output during execution.
145
+ Can be True/False or integer level (2 for extra details).
146
+ cache_key: Cache key for deterministic behavior across runs.
147
+ Overrides global configuration for this scenario.
148
+ debug: Whether to enable debug mode with step-by-step execution.
149
+ Overrides global configuration for this scenario.
150
+
151
+ Example:
152
+ ```python
153
+ executor = ScenarioExecutor(
154
+ name="customer service test",
155
+ description="Customer has a billing question and needs help",
156
+ agents=[
157
+ customer_service_agent,
158
+ scenario.UserSimulatorAgent(),
159
+ scenario.JudgeAgent(criteria=[
160
+ "Agent is polite and professional",
161
+ "Agent addresses the billing question",
162
+ "Agent provides clear next steps"
163
+ ])
164
+ ],
165
+ max_turns=15,
166
+ verbose=True,
167
+ debug=False
168
+ )
169
+ ```
170
+ """
171
+ self.name = name
172
+ self.description = description
173
+ self.agents = agents
174
+ self.script = script or [proceed()]
175
+
176
+ config = ScenarioConfig(
177
+ max_turns=max_turns,
178
+ verbose=verbose,
179
+ cache_key=cache_key,
180
+ debug=debug,
181
+ )
182
+ self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
67
183
 
68
- self.scenario = scenario.model_copy()
69
- self._context = context
70
- self._script = script or [scenario.proceed()]
71
- self.current_turn = 0
72
184
  self.reset()
73
185
 
186
+ @classmethod
187
+ async def run(
188
+ cls,
189
+ name: str,
190
+ description: str,
191
+ agents: List[AgentAdapter] = [],
192
+ max_turns: Optional[int] = None,
193
+ verbose: Optional[Union[bool, int]] = None,
194
+ cache_key: Optional[str] = None,
195
+ debug: Optional[bool] = None,
196
+ script: Optional[List[ScriptStep]] = None,
197
+ ) -> ScenarioResult:
198
+ """
199
+ High-level interface for running a scenario test.
200
+
201
+ This is the main entry point for executing scenario tests. It creates a
202
+ ScenarioExecutor instance and runs it in an isolated thread pool to support
203
+ parallel execution and prevent blocking.
204
+
205
+ Args:
206
+ name: Human-readable name for the scenario
207
+ description: Detailed description of what the scenario tests
208
+ agents: List of agent adapters (agent under test, user simulator, judge)
209
+ max_turns: Maximum conversation turns before timeout (default: 10)
210
+ verbose: Show detailed output during execution
211
+ cache_key: Cache key for deterministic behavior
212
+ debug: Enable debug mode for step-by-step execution
213
+ script: Optional script steps to control scenario flow
214
+
215
+ Returns:
216
+ ScenarioResult containing the test outcome, conversation history,
217
+ success/failure status, and detailed reasoning
218
+
219
+ Example:
220
+ ```python
221
+ import scenario
222
+
223
+ # Simple scenario with automatic flow
224
+ result = await scenario.run(
225
+ name="help request",
226
+ description="User asks for help with a technical problem",
227
+ agents=[
228
+ my_agent,
229
+ scenario.UserSimulatorAgent(),
230
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
231
+ ]
232
+ )
233
+
234
+ # Scripted scenario with custom evaluations
235
+ result = await scenario.run(
236
+ name="custom interaction",
237
+ description="Test specific conversation flow",
238
+ agents=[
239
+ my_agent,
240
+ scenario.UserSimulatorAgent(),
241
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
242
+ ],
243
+ script=[
244
+ scenario.user("Hello"),
245
+ scenario.agent(),
246
+ custom_eval,
247
+ scenario.succeed()
248
+ ]
249
+ )
250
+
251
+ # Results analysis
252
+ print(f"Test {'PASSED' if result.success else 'FAILED'}")
253
+ print(f"Reasoning: {result.reasoning}")
254
+ print(f"Conversation had {len(result.messages)} messages")
255
+ ```
256
+
257
+ Note:
258
+ - Runs in isolated thread pool to support parallel execution
259
+ - Blocks until scenario completes or times out
260
+ - All agent calls are automatically cached when cache_key is set
261
+ - Exception handling ensures clean resource cleanup
262
+ """
263
+ scenario = cls(
264
+ name=name,
265
+ description=description,
266
+ agents=agents,
267
+ max_turns=max_turns,
268
+ verbose=verbose,
269
+ cache_key=cache_key,
270
+ debug=debug,
271
+ script=script,
272
+ )
273
+
274
+ # We'll use a thread pool to run the execution logic, we
275
+ # require a separate thread because even though asyncio is
276
+ # being used throughout, any user code on the callback can
277
+ # be blocking, preventing them from running scenarios in parallel
278
+ with concurrent.futures.ThreadPoolExecutor() as executor:
279
+
280
+ def run_in_thread():
281
+ loop = asyncio.new_event_loop()
282
+ asyncio.set_event_loop(loop)
283
+
284
+ try:
285
+ return loop.run_until_complete(scenario._run())
286
+ finally:
287
+ loop.close()
288
+
289
+ # Run the function in the thread pool and await its result
290
+ # This converts the thread's execution into a Future that the current
291
+ # event loop can await without blocking
292
+ loop = asyncio.get_event_loop()
293
+ result = await loop.run_in_executor(executor, run_in_thread)
294
+ return result
295
+
74
296
  def reset(self):
75
- self.messages = []
76
- self._agents = []
297
+ """
298
+ Reset the scenario executor to initial state.
299
+
300
+ This method reinitializes all internal state for a fresh scenario run,
301
+ including conversation history, turn counters, and agent timing information.
302
+ Called automatically during initialization and can be used to rerun scenarios.
303
+
304
+ Example:
305
+ ```python
306
+ executor = ScenarioExecutor(...)
307
+
308
+ # Run first test
309
+ result1 = await executor._run()
310
+
311
+ # Reset and run again
312
+ executor.reset()
313
+ result2 = await executor._run()
314
+ ```
315
+ """
316
+ self._state = ScenarioState(
317
+ description=self.description,
318
+ messages=[],
319
+ thread_id=str(PKSUID("thread")),
320
+ current_turn=0,
321
+ config=self.config,
322
+ _executor=self,
323
+ )
324
+ # Pydantic doesn't actually set the _executor field from the constructor, as it's private, so we need to do it manually
325
+ self._state._executor = self
326
+
77
327
  self._pending_messages = {}
78
- self.thread_id = str(PKSUID("thread"))
79
328
  self._total_start_time = time.time()
80
329
  self._agent_times = {}
81
330
 
82
- for AgentClass in self.scenario.agents:
83
- self._agents.append(
84
- AgentClass(
85
- input=AgentInput(
86
- thread_id=self.thread_id,
87
- messages=[],
88
- new_messages=[],
89
- context=self._context or {},
90
- requested_role=list(AgentClass.roles)[0],
91
- scenario_state=self,
92
- )
93
- )
94
- )
95
-
96
331
  self._new_turn()
97
- self.current_turn = 0
332
+ self._state.current_turn = 0
98
333
 
99
- context_scenario.set(self.scenario)
334
+ context_scenario.set(self)
100
335
 
101
336
  def add_message(
102
337
  self, message: ChatCompletionMessageParam, from_agent_idx: Optional[int] = None
103
338
  ):
104
- self.messages.append(message)
339
+ """
340
+ Add a message to the conversation and broadcast to other agents.
341
+
342
+ This method adds a message to the conversation history and makes it available
343
+ to other agents in their next call. It's used internally by the executor
344
+ and can be called from script steps to inject custom messages.
345
+
346
+ Args:
347
+ message: OpenAI-compatible message to add to the conversation
348
+ from_agent_idx: Index of the agent that generated this message.
349
+ Used to avoid broadcasting the message back to its creator.
350
+
351
+ Example:
352
+ ```python
353
+ def inject_system_message(state: ScenarioState) -> None:
354
+ state._executor.add_message({
355
+ "role": "system",
356
+ "content": "The user is now in a hurry"
357
+ })
358
+
359
+ # Use in script
360
+ result = await scenario.run(
361
+ name="system message test",
362
+ agents=[agent, user_sim, judge],
363
+ script=[
364
+ scenario.user("Hello"),
365
+ scenario.agent(),
366
+ inject_system_message,
367
+ scenario.user(), # Will see the system message
368
+ scenario.succeed()
369
+ ]
370
+ )
371
+ ```
372
+ """
373
+ self._state.messages.append(message)
105
374
 
106
375
  # Broadcast the message to other agents
107
- for idx, _ in enumerate(self._agents):
376
+ for idx, _ in enumerate(self.agents):
108
377
  if idx == from_agent_idx:
109
378
  continue
110
379
  if idx not in self._pending_messages:
@@ -116,19 +385,57 @@ class ScenarioExecutor:
116
385
  messages: List[ChatCompletionMessageParam],
117
386
  from_agent_idx: Optional[int] = None,
118
387
  ):
388
+ """
389
+ Add multiple messages to the conversation.
390
+
391
+ Convenience method for adding multiple messages at once. Each message
392
+ is added individually using add_message().
393
+
394
+ Args:
395
+ messages: List of OpenAI-compatible messages to add
396
+ from_agent_idx: Index of the agent that generated these messages
397
+
398
+ Example:
399
+ ```python
400
+ # Agent returns multiple messages for a complex interaction
401
+ messages = [
402
+ {"role": "assistant", "content": "Let me search for that..."},
403
+ {"role": "assistant", "content": "Here's what I found: ..."}
404
+ ]
405
+ executor.add_messages(messages, from_agent_idx=0)
406
+ ```
407
+ """
119
408
  for message in messages:
120
409
  self.add_message(message, from_agent_idx)
121
410
 
122
411
  def _new_turn(self):
123
- self._pending_agents_on_turn = set(self._agents)
412
+ self._pending_agents_on_turn = set(self.agents)
124
413
  self._pending_roles_on_turn = [
125
- ScenarioAgentRole.USER,
126
- ScenarioAgentRole.AGENT,
127
- ScenarioAgentRole.JUDGE,
414
+ AgentRole.USER,
415
+ AgentRole.AGENT,
416
+ AgentRole.JUDGE,
128
417
  ]
129
- self.current_turn += 1
418
+ self._state.current_turn += 1
130
419
 
131
420
  async def step(self) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
421
+ """
422
+ Execute a single step in the scenario.
423
+
424
+ A step consists of calling the next agent in the current turn's sequence
425
+ and processing their response. This method is used internally by the
426
+ scenario execution flow.
427
+
428
+ Returns:
429
+ Either a list of messages (if the scenario continues) or a
430
+ ScenarioResult (if the scenario should end)
431
+
432
+ Raises:
433
+ ValueError: If no result is returned from the internal step method
434
+
435
+ Note:
436
+ This is primarily an internal method. Most users should use the
437
+ high-level run() method or script DSL functions instead.
438
+ """
132
439
  result = await self._step()
133
440
  if result is None:
134
441
  raise ValueError("No result from step")
@@ -139,8 +446,8 @@ class ScenarioExecutor:
139
446
  go_to_next_turn=True,
140
447
  on_turn: Optional[
141
448
  Union[
142
- Callable[["ScenarioExecutor"], None],
143
- Callable[["ScenarioExecutor"], Awaitable[None]],
449
+ Callable[["ScenarioState"], None],
450
+ Callable[["ScenarioState"], Awaitable[None]],
144
451
  ]
145
452
  ] = None,
146
453
  ) -> Union[List[ChatCompletionMessageParam], ScenarioResult, None]:
@@ -151,9 +458,9 @@ class ScenarioExecutor:
151
458
  self._new_turn()
152
459
 
153
460
  if on_turn:
154
- await await_if_awaitable(on_turn(self))
461
+ await await_if_awaitable(on_turn(self._state))
155
462
 
156
- if self.current_turn >= (self.scenario.max_turns or 10):
463
+ if self._state.current_turn >= (self.config.max_turns or 10):
157
464
  return self._reached_max_turns()
158
465
 
159
466
  current_role = self._pending_roles_on_turn[0]
@@ -166,10 +473,10 @@ class ScenarioExecutor:
166
473
  return await self._call_agent(idx, role=current_role)
167
474
 
168
475
  def _next_agent_for_role(
169
- self, role: ScenarioAgentRole
170
- ) -> Tuple[int, Optional[ScenarioAgentAdapter]]:
171
- for idx, agent in enumerate(self._agents):
172
- if role in agent.roles and agent in self._pending_agents_on_turn:
476
+ self, role: AgentRole
477
+ ) -> Tuple[int, Optional[AgentAdapter]]:
478
+ for idx, agent in enumerate(self.agents):
479
+ if role == agent.role and agent in self._pending_agents_on_turn:
173
480
  return idx, agent
174
481
  return -1, None
175
482
 
@@ -177,8 +484,8 @@ class ScenarioExecutor:
177
484
  # If we reached max turns without conclusion, fail the test
178
485
  agent_roles_agents_idx = [
179
486
  idx
180
- for idx, agent in enumerate(self._agents)
181
- if ScenarioAgentRole.AGENT in agent.roles
487
+ for idx, agent in enumerate(self.agents)
488
+ if agent.role == AgentRole.AGENT
182
489
  ]
183
490
  agent_times = [
184
491
  self._agent_times[idx]
@@ -189,14 +496,14 @@ class ScenarioExecutor:
189
496
 
190
497
  return ScenarioResult(
191
498
  success=False,
192
- messages=self.messages,
499
+ messages=self._state.messages,
193
500
  reasoning=error_message
194
- or f"Reached maximum turns ({self.scenario.max_turns or 10}) without conclusion",
501
+ or f"Reached maximum turns ({self.config.max_turns or 10}) without conclusion",
195
502
  total_time=time.time() - self._total_start_time,
196
503
  agent_time=agent_time,
197
504
  )
198
505
 
199
- async def run(self) -> ScenarioResult:
506
+ async def _run(self) -> ScenarioResult:
200
507
  """
201
508
  Run a scenario against the agent under test.
202
509
 
@@ -207,13 +514,13 @@ class ScenarioExecutor:
207
514
  ScenarioResult containing the test outcome
208
515
  """
209
516
 
210
- if self.scenario.verbose:
517
+ if self.config.verbose:
211
518
  print("") # new line
212
519
 
213
520
  self.reset()
214
521
 
215
- for script_step in self._script:
216
- callable = script_step(self)
522
+ for script_step in self.script:
523
+ callable = script_step(self._state)
217
524
  if isinstance(callable, Awaitable):
218
525
  result = await callable
219
526
  else:
@@ -232,11 +539,11 @@ class ScenarioExecutor:
232
539
  )
233
540
 
234
541
  async def _call_agent(
235
- self, idx: int, role: ScenarioAgentRole
542
+ self, idx: int, role: AgentRole, request_judgment: bool = False
236
543
  ) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
237
- agent = self._agents[idx]
544
+ agent = self.agents[idx]
238
545
 
239
- if role == ScenarioAgentRole.USER and self.scenario.debug:
546
+ if role == AgentRole.USER and self.config.debug:
240
547
  print(
241
548
  f"\n{self._scenario_name()}{termcolor.colored('[Debug Mode]', 'yellow')} Press enter to continue or type a message to send"
242
549
  )
@@ -258,28 +565,26 @@ class ScenarioExecutor:
258
565
  with show_spinner(
259
566
  text=(
260
567
  "Judging..."
261
- if role == ScenarioAgentRole.JUDGE
262
- else f"{role.value if isinstance(role, ScenarioAgentRole) else role}:"
568
+ if role == AgentRole.JUDGE
569
+ else f"{role.value if isinstance(role, AgentRole) else role}:"
263
570
  ),
264
571
  color=(
265
572
  "blue"
266
- if role == ScenarioAgentRole.AGENT
267
- else "green" if role == ScenarioAgentRole.USER else "yellow"
573
+ if role == AgentRole.AGENT
574
+ else "green" if role == AgentRole.USER else "yellow"
268
575
  ),
269
- enabled=self.scenario.verbose,
576
+ enabled=self.config.verbose,
270
577
  ):
271
578
  start_time = time.time()
272
579
 
273
580
  agent_response = agent.call(
274
581
  AgentInput(
275
582
  # TODO: test thread_id
276
- thread_id=self.thread_id,
277
- messages=self.messages,
583
+ thread_id=self._state.thread_id,
584
+ messages=self._state.messages,
278
585
  new_messages=self._pending_messages.get(idx, []),
279
- # TODO: test context
280
- context=self._context or {},
281
- requested_role=role,
282
- scenario_state=self,
586
+ judgment_request=request_judgment,
587
+ scenario_state=self._state,
283
588
  )
284
589
  )
285
590
  if not isinstance(agent_response, Awaitable):
@@ -303,12 +608,12 @@ class ScenarioExecutor:
303
608
  else:
304
609
  messages = convert_agent_return_types_to_openai_messages(
305
610
  agent_response,
306
- role="user" if role == ScenarioAgentRole.USER else "assistant",
611
+ role="user" if role == AgentRole.USER else "assistant",
307
612
  )
308
613
 
309
614
  self.add_messages(messages, from_agent_idx=idx)
310
615
 
311
- if messages and self.scenario.verbose:
616
+ if messages and self.config.verbose:
312
617
  print_openai_messages(
313
618
  self._scenario_name(),
314
619
  [m for m in messages if m["role"] != "system"],
@@ -317,75 +622,51 @@ class ScenarioExecutor:
317
622
  return messages
318
623
 
319
624
  def _scenario_name(self):
320
- if self.scenario.verbose == 2:
321
- return termcolor.colored(f"[Scenario: {self.scenario.name}] ", "yellow")
625
+ if self.config.verbose == 2:
626
+ return termcolor.colored(f"[Scenario: {self.name}] ", "yellow")
322
627
  else:
323
628
  return ""
324
629
 
325
- # State access utils
326
-
327
- def last_message(self) -> ChatCompletionMessageParam:
328
- if len(self.messages) == 0:
329
- raise ValueError("No messages found")
330
- return self.messages[-1]
331
-
332
- def last_user_message(self) -> ChatCompletionUserMessageParam:
333
- user_messages = [m for m in self.messages if m["role"] == "user"]
334
- if not user_messages:
335
- raise ValueError("No user messages found")
336
- return user_messages[-1]
337
-
338
- def last_tool_call(
339
- self, tool_name: str
340
- ) -> Optional[ChatCompletionMessageToolCallParam]:
341
- for message in reversed(self.messages):
342
- if message["role"] == "assistant" and "tool_calls" in message:
343
- for tool_call in message["tool_calls"]:
344
- if tool_call["function"]["name"] == tool_name:
345
- return tool_call
346
- return None
347
-
348
- def has_tool_call(self, tool_name: str) -> bool:
349
- return self.last_tool_call(tool_name) is not None
350
-
351
630
  # Scripting utils
352
631
 
353
632
  async def message(self, message: ChatCompletionMessageParam) -> None:
354
633
  if message["role"] == "user":
355
- await self._script_call_agent(ScenarioAgentRole.USER, message)
634
+ await self._script_call_agent(AgentRole.USER, message)
356
635
  elif message["role"] == "assistant":
357
- await self._script_call_agent(ScenarioAgentRole.AGENT, message)
636
+ await self._script_call_agent(AgentRole.AGENT, message)
358
637
  else:
359
638
  self.add_message(message)
360
639
 
361
640
  async def user(
362
641
  self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
363
642
  ) -> None:
364
- await self._script_call_agent(ScenarioAgentRole.USER, content)
643
+ await self._script_call_agent(AgentRole.USER, content)
365
644
 
366
645
  async def agent(
367
646
  self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
368
647
  ) -> None:
369
- await self._script_call_agent(ScenarioAgentRole.AGENT, content)
648
+ await self._script_call_agent(AgentRole.AGENT, content)
370
649
 
371
650
  async def judge(
372
651
  self, content: Optional[Union[str, ChatCompletionMessageParam]] = None
373
652
  ) -> Optional[ScenarioResult]:
374
- return await self._script_call_agent(ScenarioAgentRole.JUDGE, content)
653
+ return await self._script_call_agent(
654
+ AgentRole.JUDGE, content, request_judgment=True
655
+ )
375
656
 
376
657
  async def proceed(
377
658
  self,
378
659
  turns: Optional[int] = None,
379
660
  on_turn: Optional[
380
661
  Union[
381
- Callable[["ScenarioExecutor"], None],
382
- Callable[["ScenarioExecutor"], Awaitable[None]],
662
+ Callable[["ScenarioState"], None],
663
+ Callable[["ScenarioState"], Awaitable[None]],
383
664
  ]
384
665
  ] = None,
385
666
  on_step: Optional[
386
667
  Union[
387
- Callable[["ScenarioExecutor"], None],
388
- Callable[["ScenarioExecutor"], Awaitable[None]],
668
+ Callable[["ScenarioState"], None],
669
+ Callable[["ScenarioState"], Awaitable[None]],
389
670
  ]
390
671
  ] = None,
391
672
  ) -> Optional[ScenarioResult]:
@@ -396,42 +677,42 @@ class ScenarioExecutor:
396
677
  go_to_next_turn=(
397
678
  turns is None
398
679
  or initial_turn is None
399
- or (self.current_turn + 1 < initial_turn + turns)
680
+ or (self._state.current_turn + 1 < initial_turn + turns)
400
681
  ),
401
682
  )
402
683
 
403
684
  if initial_turn is None:
404
- initial_turn = self.current_turn
685
+ initial_turn = self._state.current_turn
405
686
 
406
687
  if next_message is None:
407
688
  break
408
689
 
409
690
  if on_step:
410
- await await_if_awaitable(on_step(self))
691
+ await await_if_awaitable(on_step(self._state))
411
692
 
412
693
  if isinstance(next_message, ScenarioResult):
413
694
  return next_message
414
695
 
415
- async def succeed(self) -> ScenarioResult:
696
+ async def succeed(self, reasoning: Optional[str] = None) -> ScenarioResult:
416
697
  return ScenarioResult(
417
698
  success=True,
418
- messages=self.messages,
419
- reasoning="Scenario marked as successful with scenario.succeed()",
420
- passed_criteria=self.scenario.criteria,
699
+ messages=self._state.messages,
700
+ reasoning=reasoning
701
+ or "Scenario marked as successful with scenario.succeed()",
421
702
  )
422
703
 
423
- async def fail(self) -> ScenarioResult:
704
+ async def fail(self, reasoning: Optional[str] = None) -> ScenarioResult:
424
705
  return ScenarioResult(
425
706
  success=False,
426
- messages=self.messages,
427
- reasoning="Scenario marked as failed with scenario.fail()",
428
- passed_criteria=self.scenario.criteria,
707
+ messages=self._state.messages,
708
+ reasoning=reasoning or "Scenario marked as failed with scenario.fail()",
429
709
  )
430
710
 
431
711
  async def _script_call_agent(
432
712
  self,
433
- role: ScenarioAgentRole,
713
+ role: AgentRole,
434
714
  content: Optional[Union[str, ChatCompletionMessageParam]] = None,
715
+ request_judgment: bool = False,
435
716
  ) -> Optional[ScenarioResult]:
436
717
  idx, next_agent = self._next_agent_for_role(role)
437
718
  if not next_agent:
@@ -439,12 +720,21 @@ class ScenarioExecutor:
439
720
  idx, next_agent = self._next_agent_for_role(role)
440
721
 
441
722
  if not next_agent:
723
+ role_class = (
724
+ "a scenario.UserSimulatorAgent()"
725
+ if role == AgentRole.USER
726
+ else (
727
+ "a scenario.JudgeAgent()"
728
+ if role == AgentRole.JUDGE
729
+ else "your agent"
730
+ )
731
+ )
442
732
  if content:
443
733
  raise ValueError(
444
- f"Cannot generate a message for role `{role.value}` with content `{content}` because no agent with this role was found"
734
+ f"Cannot generate a message for role `{role.value}` with content `{content}` because no agent with this role was found, please add {role_class} to the scenario `agents` list"
445
735
  )
446
736
  raise ValueError(
447
- f"Cannot generate a message for role `{role.value}` because no agent with this role was found"
737
+ f"Cannot generate a message for role `{role.value}` because no agent with this role was found, please add {role_class} to the scenario `agents` list"
448
738
  )
449
739
 
450
740
  self._pending_agents_on_turn.remove(next_agent)
@@ -457,10 +747,12 @@ class ScenarioExecutor:
457
747
  message = content
458
748
 
459
749
  self.add_message(message)
460
- if self.scenario.verbose:
750
+ if self.config.verbose:
461
751
  print_openai_messages(self._scenario_name(), [message])
462
752
  return
463
753
 
464
- result = await self._call_agent(idx, role=role)
754
+ result = await self._call_agent(
755
+ idx, role=role, request_judgment=request_judgment
756
+ )
465
757
  if isinstance(result, ScenarioResult):
466
758
  return result