langwatch-scenario 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langwatch-scenario
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: The end-to-end agent testing library
5
5
  Author-email: LangWatch Team <support@langwatch.ai>
6
6
  License: MIT
@@ -32,6 +32,9 @@ Requires-Dist: isort; extra == "dev"
32
32
  Requires-Dist: pytest-cov; extra == "dev"
33
33
  Requires-Dist: pre-commit; extra == "dev"
34
34
  Requires-Dist: commitizen; extra == "dev"
35
+ Requires-Dist: pyright; extra == "dev"
36
+ Requires-Dist: pydantic-ai; extra == "dev"
37
+ Requires-Dist: function-schema; extra == "dev"
35
38
 
36
39
  ![scenario](https://github.com/langwatch/scenario/raw/main/assets/scenario-wide.webp)
37
40
 
@@ -39,11 +42,17 @@ Requires-Dist: commitizen; extra == "dev"
39
42
  <!-- Discord, PyPI, Docs, etc links -->
40
43
  </div>
41
44
 
42
- # Scenario: Use an Agent to test your Agent
45
+ # Scenario
43
46
 
44
47
  Scenario is an Agent Testing Framework for testing AI agents through Simulation Testing.
45
48
 
46
- You define the scenarios, and the testing agent will simulate a real user as it follows them, it will keep chatting back and forth with _your_ agent to play out the simulation, until it reaches the desired goal or detects an unexpected behavior based on the criteria you defined.
49
+ You define the conversation scenario and let it play out, it will keep chatting back and forth with _your_ agent until it reaches the desired goal or detects an unexpected behavior based on the criteria you defined.
50
+
51
+ - Test your agents end-to-end conversations with specified scenarios to capture both happy paths and edge cases
52
+ - Full flexibility of how much you want to guide the conversation, from fully scripted scenarios to completely automated simulations
53
+ - Run evaluations at any point of the conversation, designed for multi-turn
54
+ - Works in combination with any testing and LLM evaluation frameworks, completely agnostic
55
+ - Works with any LLM and Agent Framework, easy integration
47
56
 
48
57
  [📺 Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
49
58
 
@@ -52,6 +61,49 @@ You define the scenarios, and the testing agent will simulate a real user as it
52
61
  - [Scenario TypeScript](https://github.com/langwatch/scenario-ts/)
53
62
  - [Scenario Go](https://github.com/langwatch/scenario-go/)
54
63
 
64
+ ## Example
65
+
66
+ ```python
67
+ @pytest.mark.agent_test
68
+ @pytest.mark.asyncio
69
+ async def test_weather_agent():
70
+ # Integrate with your agent
71
+ class WeatherAgent(scenario.AgentAdapter):
72
+ async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
73
+ return weather_agent(input.messages)
74
+
75
+ # Define any custom assertions
76
+ def check_for_weather_tool_call(state: scenario.ScenarioState):
77
+ assert state.has_tool_call("get_current_weather")
78
+
79
+ # Run the scenario
80
+ result = await scenario.run(
81
+ name="checking the weather",
82
+ description="""
83
+ The user is planning a boat trip from Barcelona to Rome,
84
+ and is wondering what the weather will be like.
85
+ """,
86
+ agents=[
87
+ WeatherAgent(),
88
+ scenario.UserSimulatorAgent(model="openai/gpt-4.1-mini"),
89
+ ],
90
+ script=[
91
+ scenario.user(),
92
+ scenario.agent(),
93
+ check_for_weather_tool_call, # check for tool call after the first agent response
94
+ scenario.succeed(),
95
+ ],
96
+ )
97
+
98
+ # Assert the simulation was successful
99
+ assert result.success
100
+ ```
101
+
102
+ > [!NOTE]
103
+ > This is a very basic example, keep reading to see how to run a simulation completely script-free, using a Judge Agent to evaluate in real-time.
104
+
105
+ Check out more examples in the [examples folder](./examples/).
106
+
55
107
  ## Getting Started
56
108
 
57
109
  Install pytest and scenario:
@@ -60,51 +112,45 @@ Install pytest and scenario:
60
112
  pip install pytest langwatch-scenario
61
113
  ```
62
114
 
63
- Now create your first scenario and save it as `tests/test_vegetarian_recipe_agent.py`:
115
+ Now create your first scenario and save it as `tests/test_vegetarian_recipe_agent.py`, copy the full working example below:
64
116
 
65
117
  ```python
66
118
  import pytest
119
+ import scenario
120
+ import litellm
67
121
 
68
- from scenario import Scenario, TestingAgent, ScenarioAgentAdapter, AgentInput, AgentReturnTypes, scenario_cache
69
-
70
- Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
71
-
72
-
73
- # Create an adapter to call your agent
74
- class VegetarianRecipeAgentAdapter(ScenarioAgentAdapter):
75
- def __init__(self, input: AgentInput):
76
- self.agent = VegetarianRecipeAgent()
77
-
78
- async def call(self, input: AgentInput) -> AgentReturnTypes:
79
- return self.agent.run(input.last_new_user_message_str())
122
+ scenario.configure(default_model="openai/gpt-4.1-mini")
80
123
 
81
124
 
82
125
  @pytest.mark.agent_test
83
126
  @pytest.mark.asyncio
84
127
  async def test_vegetarian_recipe_agent():
85
- # Define the simulated scenario
86
- scenario = Scenario(
128
+ class Agent(scenario.AgentAdapter):
129
+ async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
130
+ return vegetarian_recipe_agent(input.messages)
131
+
132
+ # Run a simulation scenario
133
+ result = await scenario.run(
87
134
  name="dinner idea",
88
135
  description="""
89
136
  It's saturday evening, the user is very hungry and tired,
90
137
  but have no money to order out, so they are looking for a recipe.
91
-
92
- The user never mentions they want a vegetarian recipe.
93
138
  """,
94
- agent=vegetarian_recipe_agent,
95
- # List the evaluation criteria for the scenario to be considered successful
96
- criteria=[
97
- "Agent should not ask more than two follow-up questions",
98
- "Agent should generate a recipe",
99
- "Recipe should include a list of ingredients",
100
- "Recipe should include step-by-step cooking instructions",
101
- "Recipe should be vegetarian and not include any sort of meat",
139
+ agents=[
140
+ Agent(),
141
+ scenario.UserSimulatorAgent(),
142
+ scenario.JudgeAgent(
143
+ criteria=[
144
+ "Agent should not ask more than two follow-up questions",
145
+ "Agent should generate a recipe",
146
+ "Recipe should include a list of ingredients",
147
+ "Recipe should include step-by-step cooking instructions",
148
+ "Recipe should be vegetarian and not include any sort of meat",
149
+ ]
150
+ ),
102
151
  ],
103
152
  )
104
153
 
105
- # Run the scenario and get results
106
- result = await scenario.run()
107
-
108
154
  # Assert for pytest to know whether the test passed
109
155
  assert result.success
110
156
 
@@ -113,33 +159,24 @@ async def test_vegetarian_recipe_agent():
113
159
  import litellm
114
160
 
115
161
 
116
- class VegetarianRecipeAgent:
117
- def __init__(self):
118
- self.history = []
119
-
120
- @scenario_cache()
121
- def run(self, message: str):
122
- self.history.append({"role": "user", "content": message})
123
-
124
- response = litellm.completion(
125
- model="openai/gpt-4o-mini",
126
- messages=[
127
- {
128
- "role": "system",
129
- "content": """
130
- You are a vegetarian recipe agent.
131
- Given the user request, ask AT MOST ONE follow-up question,
132
- then provide a complete recipe. Keep your responses concise and focused.
133
- """,
134
- },
135
- *self.history,
136
- ],
137
- )
138
- message = response.choices[0].message # type: ignore
139
- self.history.append(message)
140
-
141
- return [message]
162
+ @scenario.cache()
163
+ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
164
+ response = litellm.completion(
165
+ model="openai/gpt-4.1-mini",
166
+ messages=[
167
+ {
168
+ "role": "system",
169
+ "content": """
170
+ You are a vegetarian recipe agent.
171
+ Given the user request, ask AT MOST ONE follow-up question,
172
+ then provide a complete recipe. Keep your responses concise and focused.
173
+ """,
174
+ },
175
+ *messages,
176
+ ],
177
+ )
142
178
 
179
+ return response.choices[0].message # type: ignore
143
180
  ```
144
181
 
145
182
  Create a `.env` file and put your OpenAI API key in it:
@@ -158,42 +195,57 @@ This is how it will look like:
158
195
 
159
196
  [![asciicast](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11.svg)](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
160
197
 
161
- You can find a fully working example in [examples/test_vegetarian_recipe_agent.py](examples/test_vegetarian_recipe_agent.py).
198
+ You can find the same code example in [examples/test_vegetarian_recipe_agent.py](examples/test_vegetarian_recipe_agent.py).
162
199
 
163
- ## Customize strategy and max_turns
200
+ ## Script-free Simulation
164
201
 
165
- You can customize how should the testing agent go about testing by defining a `strategy` field. You can also limit the maximum number of turns the scenario will take by setting the `max_turns` field (defaults to 10).
202
+ By providing a User Simulator Agent and a description of the Scenario, the simulated user will automatically generate messages to the agent until the scenario is successful or the maximum number of turns is reached.
166
203
 
167
- For example, in this Lovable Clone scenario test:
204
+ You can then use a Judge Agent to evaluate the scenario in real-time given certain criteria, at every turn, the Judge Agent will decide if it should let the simulation proceed or end it with a verdict.
205
+
206
+ You can combine it with a script, to control for example the beginning of the conversation, or simply let it run scriptless, this is very useful to test an open case like a vibe coding assistant:
168
207
 
169
208
  ```python
170
- scenario = Scenario(
209
+ result = await scenario.run(
171
210
  name="dog walking startup landing page",
172
211
  description="""
173
212
  the user wants to create a new landing page for their dog walking startup
174
213
 
175
214
  send the first message to generate the landing page, then a single follow up request to extend it, then give your final verdict
176
215
  """,
177
- agent=lovable_agent,
178
- criteria=[
179
- "agent reads the files before go and making changes",
180
- "agent modified the index.css file, not only the Index.tsx file",
181
- "agent created a comprehensive landing page",
182
- "agent extended the landing page with a new section",
183
- "agent should NOT say it can't read the file",
184
- "agent should NOT produce incomplete code or be too lazy to finish",
216
+ agents=[
217
+ LovableAgentAdapter(template_path=template_path),
218
+ scenario.UserSimulatorAgent(),
219
+ scenario.JudgeAgent(
220
+ criteria=[
221
+ "agent reads the files before go and making changes",
222
+ "agent modified the index.css file, not only the Index.tsx file",
223
+ "agent created a comprehensive landing page",
224
+ "agent extended the landing page with a new section",
225
+ "agent should NOT say it can't read the file",
226
+ "agent should NOT produce incomplete code or be too lazy to finish",
227
+ ],
228
+ ),
185
229
  ],
186
- max_turns=5,
230
+ max_turns=5, # optional
187
231
  )
188
-
189
- result = await scenario.run()
190
232
  ```
191
233
 
192
- You can find a fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
234
+ Check out the fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
235
+
236
+ ## Full Control of the Conversation
237
+
238
+ You can specify a script for guiding the scenario by passing a list of steps to the `script` field, those steps are simply arbitrary functions that take the current state of the scenario as an argument, so you can do things like:
193
239
 
194
- ## Specify a script for guiding the scenario
240
+ - Control what the user says, or let it be generated automatically
241
+ - Control what the agent says, or let it be generated automatically
242
+ - Add custom assertions, for example making sure a tool was called
243
+ - Add a custom evaluation, from an external library
244
+ - Let the simulation proceed for a certain number of turns, and evaluate at each new turn
245
+ - Trigger the judge agent to decide on a verdict
246
+ - Add arbitrary messages like mock tool calls in the middle of the conversation
195
247
 
196
- You can specify a script for guiding the scenario by passing a list of steps to the `script` field.
248
+ Everything is possible, using the same simple structure:
197
249
 
198
250
  ```python
199
251
  @pytest.mark.agent_test
@@ -202,7 +254,7 @@ async def test_ai_assistant_agent():
202
254
  scenario = Scenario(
203
255
  name="false assumptions",
204
256
  description="""
205
- The agent makes false assumption about being an ATM bank, and user corrects it
257
+ The agent makes false assumption that the user is talking about an ATM bank, and user corrects it that they actually mean river banks
206
258
  """,
207
259
  agent=AiAssistantAgentAdapter,
208
260
  criteria=[
@@ -219,13 +271,22 @@ async def test_ai_assistant_agent():
219
271
  [
220
272
  # Define existing history of messages
221
273
  scenario.user("how do I safely approach a bank?"),
274
+
222
275
  # Or let it be generate automatically
223
276
  scenario.agent(),
277
+
224
278
  # Add custom assertions, for example making sure a tool was called
225
279
  check_if_tool_was_called,
280
+
281
+ # Another user message
226
282
  scenario.user(),
227
- # Let the simulation proceed for 2 more turns
228
- scenario.proceed(turns=2),
283
+
284
+ # Let the simulation proceed for 2 more turns, print at every turn
285
+ scenario.proceed(
286
+ turns=2,
287
+ on_turn=lambda state: print(f"Turn {state.current_turn}: {state.messages}"),
288
+ ),
289
+
229
290
  # Time to make a judgment call
230
291
  scenario.judge(),
231
292
  ]
@@ -0,0 +1,18 @@
1
+ scenario/__init__.py,sha256=oMh5le4c4sIN2K1Ylv2xnkyKHpcOzBeqvW58fTWAFlU,7794
2
+ scenario/agent_adapter.py,sha256=pd3BdNUWna8h_9hykn1FvcyareMzUofQKKvXaAfQluY,4338
3
+ scenario/cache.py,sha256=iPpMmjKruLnnxCeLnRiQjiH89LhcVIfQQXKH5etU_m4,6217
4
+ scenario/config.py,sha256=AeDbKE-_Rrxkan64tDDDynaSNyijoIKHxWaRMqGd4oY,6121
5
+ scenario/error_messages.py,sha256=6lEx3jBGMbPx0kG0eX5zoZE-ENVM3O_ZkIbVMlnidYs,3892
6
+ scenario/judge_agent.py,sha256=7fKK_oevXzWKXDioBjHzgGSDpS0aby3oRcrc6oaip68,16973
7
+ scenario/pytest_plugin.py,sha256=s2M2mll9JSCSWB5SKDQIWT5DOCvzZOo_8JCCfJzyy8k,12849
8
+ scenario/scenario_executor.py,sha256=oz7Odv41HNLcNd_7sKUW-AKKdY-on_PyVLaxpvKjrGE,27211
9
+ scenario/scenario_state.py,sha256=I_fWoY_LvNuKCBL-b62z5bQOAI25dx55FuZNWwtIeVs,7075
10
+ scenario/script.py,sha256=7wsHZxdSgFaYLflkV6sysDxefkkag79mySR7yp7N3ug,12278
11
+ scenario/types.py,sha256=CsexCupg2WUi4dToYF5RqFdNIHx1JhaRaRRBs78YVd0,9498
12
+ scenario/user_simulator_agent.py,sha256=o8sZLMWOcTf7BKgPO_a5rPnC6GgdZQe3HujqwjPzjV8,9346
13
+ scenario/utils.py,sha256=ryJYcMoSAjVzA_f5V6Mcga5GkipYbCzaYNNpBjAQI_g,16992
14
+ langwatch_scenario-0.4.0.dist-info/METADATA,sha256=d9tNTNioHH5_1q8oIvIABaTgC6J9XmEJR4Tjim3sFks,13827
15
+ langwatch_scenario-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ langwatch_scenario-0.4.0.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
17
+ langwatch_scenario-0.4.0.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
18
+ langwatch_scenario-0.4.0.dist-info/RECORD,,
scenario/__init__.py CHANGED
@@ -1,32 +1,246 @@
1
1
  """
2
- Scenario: A testing library for conversational agents.
2
+ Scenario: Agent Testing Framework through Simulation Testing
3
+
4
+ Scenario is a comprehensive testing framework for AI agents that uses simulation testing
5
+ to validate agent behavior through realistic conversations. It enables testing of both
6
+ happy paths and edge cases by simulating user interactions and evaluating agent responses
7
+ against configurable success criteria.
8
+
9
+ Key Features:
10
+ - End-to-end conversation testing with specified scenarios
11
+ - Flexible control from fully scripted to completely automated simulations
12
+ - Multi-turn evaluation designed for complex conversational agents
13
+ - Works with any testing framework (pytest, unittest, etc.)
14
+ - Framework-agnostic integration with any LLM or agent architecture
15
+ - Built-in caching for deterministic and faster test execution
16
+
17
+ Basic Usage:
18
+ ```python
19
+ import scenario
20
+
21
+ # Configure global settings
22
+ scenario.configure(default_model="openai/gpt-4.1-mini")
23
+
24
+ # Create your agent adapter
25
+ class MyAgent(scenario.AgentAdapter):
26
+ async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
27
+ return my_agent_function(input.last_new_user_message_str())
28
+
29
+ # Run a scenario test
30
+ result = await scenario.run(
31
+ name="customer service test",
32
+ description="Customer asks about billing, agent should help politely",
33
+ agents=[
34
+ MyAgent(),
35
+ scenario.UserSimulatorAgent(),
36
+ scenario.JudgeAgent(criteria=[
37
+ "Agent is polite and professional",
38
+ "Agent addresses the billing question",
39
+ "Agent provides clear next steps"
40
+ ])
41
+ ]
42
+ )
43
+
44
+ assert result.success
45
+ ```
46
+
47
+ Advanced Usage:
48
+ ```python
49
+ # Script-controlled scenario with custom evaluations
50
+ def check_tool_usage(state: scenario.ScenarioState) -> None:
51
+ assert state.has_tool_call("get_customer_info")
52
+
53
+ result = await scenario.run(
54
+ name="scripted interaction",
55
+ description="Test specific conversation flow",
56
+ agents=[
57
+ MyAgent(),
58
+ scenario.UserSimulatorAgent(),
59
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
60
+ ],
61
+ script=[
62
+ scenario.user("I have a billing question"),
63
+ scenario.agent(),
64
+ check_tool_usage, # Custom assertion
65
+ scenario.proceed(turns=2), # Let it continue automatically
66
+ scenario.succeed("All requirements met")
67
+ ]
68
+ )
69
+ ```
70
+
71
+ Integration with Testing Frameworks:
72
+ ```python
73
+ import pytest
74
+
75
+ @pytest.mark.agent_test
76
+ @pytest.mark.asyncio
77
+ async def test_weather_agent():
78
+ result = await scenario.run(
79
+ name="weather query",
80
+ description="User asks about weather in a specific city",
81
+ agents=[
82
+ WeatherAgent(),
83
+ scenario.UserSimulatorAgent(),
84
+ scenario.JudgeAgent(criteria=["Provides accurate weather information"])
85
+ ]
86
+ )
87
+ assert result.success
88
+ ```
89
+
90
+ For more examples and detailed documentation, visit: https://github.com/langwatch/scenario
3
91
  """
4
92
 
5
93
  # First import non-dependent modules
6
- from .types import ScenarioResult, AgentInput, ScenarioAgentRole, AgentReturnTypes
94
+ from .types import ScenarioResult, AgentInput, AgentRole, AgentReturnTypes
7
95
  from .config import ScenarioConfig
8
96
 
9
97
  # Then import modules with dependencies
10
- from .scenario_agent_adapter import ScenarioAgentAdapter
11
- from .testing_agent import TestingAgent
12
- from .scenario import Scenario
98
+ from .scenario_executor import ScenarioExecutor
99
+ from .scenario_state import ScenarioState
100
+ from .agent_adapter import AgentAdapter
101
+ from .judge_agent import JudgeAgent
102
+ from .user_simulator_agent import UserSimulatorAgent
13
103
  from .cache import scenario_cache
104
+ from .script import message, user, agent, judge, proceed, succeed, fail
14
105
 
15
106
  # Import pytest plugin components
16
107
  from .pytest_plugin import pytest_configure, scenario_reporter
17
108
 
109
+ run = ScenarioExecutor.run
110
+ """
111
+ High-level interface for running scenario tests.
112
+
113
+ This is the main entry point for executing scenario-based agent tests. It creates
114
+ and runs a complete scenario simulation including user interactions, agent responses,
115
+ and success evaluation.
116
+
117
+ Args:
118
+ name: Human-readable name for the scenario
119
+ description: Detailed description that guides the simulation behavior
120
+ agents: List of agent adapters (agent under test, user simulator, judge)
121
+ max_turns: Maximum conversation turns before timeout (default: 10)
122
+ verbose: Show detailed output during execution
123
+ cache_key: Cache key for deterministic behavior across runs
124
+ debug: Enable debug mode for step-by-step execution
125
+ script: Optional script steps to control scenario flow
126
+
127
+ Returns:
128
+ ScenarioResult containing test outcome, conversation history, and detailed analysis
129
+
130
+ Example:
131
+ ```python
132
+ result = await scenario.run(
133
+ name="help request",
134
+ description="User needs help with a technical problem",
135
+ agents=[
136
+ MyAgentAdapter(),
137
+ scenario.UserSimulatorAgent(),
138
+ scenario.JudgeAgent(criteria=["Provides helpful response"])
139
+ ]
140
+ )
141
+
142
+ print(f"Test {'PASSED' if result.success else 'FAILED'}")
143
+ print(f"Reasoning: {result.reasoning}")
144
+ ```
145
+ """
146
+
147
+ configure = ScenarioConfig.configure
148
+ """
149
+ Set global configuration settings for all scenario executions.
150
+
151
+ This function allows you to configure default behavior that will be applied
152
+ to all scenarios unless explicitly overridden in individual scenario runs.
153
+
154
+ Args:
155
+ default_model: Default LLM model identifier for user simulator and judge agents
156
+ max_turns: Maximum number of conversation turns before timeout (default: 10)
157
+ verbose: Enable verbose output during scenario execution
158
+ cache_key: Cache key for deterministic scenario behavior across runs
159
+ debug: Enable debug mode for step-by-step execution with user intervention
160
+
161
+ Example:
162
+ ```python
163
+ # Set up global defaults
164
+ scenario.configure(
165
+ default_model="openai/gpt-4.1-mini",
166
+ max_turns=15,
167
+ verbose=True,
168
+ cache_key="my-test-suite-v1"
169
+ )
170
+
171
+ # All subsequent scenarios will use these defaults
172
+ result = await scenario.run(...)
173
+ ```
174
+ """
175
+
176
+ default_config = ScenarioConfig.default_config
177
+ """
178
+ Access to the current global configuration settings.
179
+
180
+ This provides read-only access to the default configuration that has been
181
+ set via scenario.configure(). Useful for debugging or conditional logic
182
+ based on current settings.
183
+
184
+ Example:
185
+ ```python
186
+ if scenario.default_config and scenario.default_config.debug:
187
+ print("Debug mode is enabled")
188
+ ```
189
+ """
190
+
191
+ cache = scenario_cache
192
+ """
193
+ Decorator for caching function calls during scenario execution.
194
+
195
+ This decorator enables deterministic testing by caching LLM calls and other
196
+ non-deterministic operations based on scenario configuration and function arguments.
197
+ Results are cached when a cache_key is configured, making tests repeatable and faster.
198
+
199
+ Args:
200
+ ignore: List of argument names to exclude from cache key computation
201
+
202
+ Example:
203
+ ```python
204
+ class MyAgent:
205
+ @scenario.cache(ignore=["self"])
206
+ def invoke(self, message: str) -> str:
207
+ # This LLM call will be cached when cache_key is set
208
+ return llm_client.complete(model="gpt-4", prompt=message)
209
+
210
+ # Enable caching for deterministic tests
211
+ scenario.configure(cache_key="test-suite-v1")
212
+ ```
213
+ """
214
+
18
215
  __all__ = [
216
+ # Functions
217
+ "run",
218
+ "configure",
219
+ "default_config",
220
+ "cache",
221
+
222
+ # Script
223
+ "message",
224
+ "proceed",
225
+ "succeed",
226
+ "fail",
227
+ "judge",
228
+ "agent",
229
+ "user",
230
+
19
231
  # Types
20
232
  "ScenarioResult",
21
233
  "AgentInput",
22
- "ScenarioAgentRole",
234
+ "AgentRole",
23
235
  "ScenarioConfig",
24
236
  "AgentReturnTypes",
25
237
 
26
238
  # Classes
27
- "Scenario",
28
- "ScenarioAgentAdapter",
29
- "TestingAgent",
239
+ "ScenarioExecutor",
240
+ "ScenarioState",
241
+ "AgentAdapter",
242
+ "UserSimulatorAgent",
243
+ "JudgeAgent",
30
244
 
31
245
  # Plugins
32
246
  "pytest_configure",