langwatch-scenario 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,385 @@
1
+ Metadata-Version: 2.4
2
+ Name: langwatch-scenario
3
+ Version: 0.6.0
4
+ Summary: The end-to-end agent testing library
5
+ Author-email: LangWatch Team <support@langwatch.ai>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/langwatch/scenario
8
+ Project-URL: Bug Tracker, https://github.com/langwatch/scenario/issues
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: pytest>=8.1.1
20
+ Requires-Dist: litellm>=1.49.0
21
+ Requires-Dist: python-dotenv>=1.0.1
22
+ Requires-Dist: termcolor>=2.4.0
23
+ Requires-Dist: pydantic>=2.7.0
24
+ Requires-Dist: joblib>=1.4.2
25
+ Requires-Dist: wrapt>=1.17.2
26
+ Requires-Dist: pytest-asyncio>=0.26.0
27
+ Requires-Dist: rich<15.0.0,>=13.3.3
28
+ Requires-Dist: pksuid>=1.1.2
29
+ Requires-Dist: pdoc3>=0.11.6
30
+ Requires-Dist: ag-ui-protocol>=0.1.0
31
+ Requires-Dist: httpx>=0.27.0
32
+ Requires-Dist: rx>=3.2.0
33
+ Requires-Dist: respx>=0.22.0
34
+ Provides-Extra: dev
35
+ Requires-Dist: black; extra == "dev"
36
+ Requires-Dist: isort; extra == "dev"
37
+ Requires-Dist: pytest-cov; extra == "dev"
38
+ Requires-Dist: pre-commit; extra == "dev"
39
+ Requires-Dist: commitizen; extra == "dev"
40
+ Requires-Dist: pyright; extra == "dev"
41
+ Requires-Dist: pydantic-ai; extra == "dev"
42
+ Requires-Dist: function-schema; extra == "dev"
43
+
44
+ ![scenario](https://github.com/langwatch/scenario/raw/main/assets/scenario-wide.webp)
45
+
46
+ <div align="center">
47
+ <!-- Discord, PyPI, Docs, etc links -->
48
+ </div>
49
+
50
+ # Scenario
51
+
52
+ Scenario is an Agent Testing Framework based on simulations, it can:
53
+
54
+ - Test real agent behavior by simulating users in different scenarios and edge cases
55
+ - Evaluate and judge at any point of the conversation, powerful multi-turn control
56
+ - Combine it with any LLM eval framework or custom evals, agnostic by design
57
+ - Integrate your Agent by implementing just one `call()` method
58
+ - Available in Python, TypeScript and Go
59
+
60
+ [📺 Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
61
+
62
+ ### In other languages
63
+
64
+ - [Scenario TypeScript](https://github.com/langwatch/scenario-ts/)
65
+ - [Scenario Go](https://github.com/langwatch/scenario-go/)
66
+
67
+ ## Example
68
+
69
+ This is how a simple simulation with tool check looks like with Scenario:
70
+
71
+ ```python
72
+ # Define any custom assertions
73
+ def check_for_weather_tool_call(state: scenario.ScenarioState):
74
+ assert state.has_tool_call("get_current_weather")
75
+
76
+ result = await scenario.run(
77
+ name="checking the weather",
78
+
79
+ # Define the prompt to guide the simulation
80
+ description="""
81
+ The user is planning a boat trip from Barcelona to Rome,
82
+ and is wondering what the weather will be like.
83
+ """,
84
+
85
+ # Define the agents that will play this simulation
86
+ agents=[
87
+ WeatherAgent(),
88
+ scenario.UserSimulatorAgent(model="openai/gpt-4.1-mini"),
89
+ ],
90
+
91
+ # (Optional) Control the simulation
92
+ script=[
93
+ scenario.user(), # let the user simulator generate a user message
94
+ scenario.agent(), # agent responds
95
+ check_for_weather_tool_call, # check for tool call after the first agent response
96
+ scenario.succeed(), # simulation ends successfully
97
+ ],
98
+ )
99
+
100
+ assert result.success
101
+ ```
102
+
103
+ > [!NOTE]
104
+ > Check out full examples in the [examples folder](./examples/).
105
+
106
+ ## Getting Started
107
+
108
+ Install pytest and scenario:
109
+
110
+ ```bash
111
+ pip install pytest langwatch-scenario
112
+ ```
113
+
114
+ Now create your first scenario and save it as `tests/test_vegetarian_recipe_agent.py`, copy the full working example below:
115
+
116
+ ```python
117
+ import pytest
118
+ import scenario
119
+ import litellm
120
+
121
+ scenario.configure(default_model="openai/gpt-4.1-mini")
122
+
123
+
124
+ @pytest.mark.agent_test
125
+ @pytest.mark.asyncio
126
+ async def test_vegetarian_recipe_agent():
127
+ class Agent(scenario.AgentAdapter):
128
+ async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
129
+ return vegetarian_recipe_agent(input.messages)
130
+
131
+ # Run a simulation scenario
132
+ result = await scenario.run(
133
+ name="dinner idea",
134
+ description="""
135
+ It's saturday evening, the user is very hungry and tired,
136
+ but have no money to order out, so they are looking for a recipe.
137
+ """,
138
+ agents=[
139
+ Agent(),
140
+ scenario.UserSimulatorAgent(),
141
+ scenario.JudgeAgent(
142
+ criteria=[
143
+ "Agent should not ask more than two follow-up questions",
144
+ "Agent should generate a recipe",
145
+ "Recipe should include a list of ingredients",
146
+ "Recipe should include step-by-step cooking instructions",
147
+ "Recipe should be vegetarian and not include any sort of meat",
148
+ ]
149
+ ),
150
+ ],
151
+ )
152
+
153
+ # Assert for pytest to know whether the test passed
154
+ assert result.success
155
+
156
+
157
+ # Example agent implementation
158
+ import litellm
159
+
160
+
161
+ @scenario.cache()
162
+ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
163
+ response = litellm.completion(
164
+ model="openai/gpt-4.1-mini",
165
+ messages=[
166
+ {
167
+ "role": "system",
168
+ "content": """
169
+ You are a vegetarian recipe agent.
170
+ Given the user request, ask AT MOST ONE follow-up question,
171
+ then provide a complete recipe. Keep your responses concise and focused.
172
+ """,
173
+ },
174
+ *messages,
175
+ ],
176
+ )
177
+
178
+ return response.choices[0].message # type: ignore
179
+ ```
180
+
181
+ Create a `.env` file and put your OpenAI API key in it:
182
+
183
+ ```bash
184
+ OPENAI_API_KEY=<your-api-key>
185
+ ```
186
+
187
+ Now run it with pytest:
188
+
189
+ ```bash
190
+ pytest -s tests/test_vegetarian_recipe_agent.py
191
+ ```
192
+
193
+ This is how it will look like:
194
+
195
+ [![asciicast](./assets/ascii-cinema.svg)](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
196
+
197
+ You can find the same code example in [examples/test_vegetarian_recipe_agent.py](examples/test_vegetarian_recipe_agent.py).
198
+
199
+ ## Simulation on Autopilot
200
+
201
+ By providing a User Simulator Agent and a description of the Scenario without a script, the simulated user will automatically generate messages to the agent until the scenario is successful or the maximum number of turns is reached.
202
+
203
+ You can then use a Judge Agent to evaluate the scenario in real-time given certain criteria, at every turn, the Judge Agent will decide if it should let the simulation proceed or end it with a verdict.
204
+
205
+ For example, here is a scenario that tests a vibe coding assistant:
206
+
207
+ ```python
208
+ result = await scenario.run(
209
+ name="dog walking startup landing page",
210
+ description="""
211
+ the user wants to create a new landing page for their dog walking startup
212
+
213
+ send the first message to generate the landing page, then a single follow up request to extend it, then give your final verdict
214
+ """,
215
+ agents=[
216
+ LovableAgentAdapter(template_path=template_path),
217
+ scenario.UserSimulatorAgent(),
218
+ scenario.JudgeAgent(
219
+ criteria=[
220
+ "agent reads the files before go and making changes",
221
+ "agent modified the index.css file, not only the Index.tsx file",
222
+ "agent created a comprehensive landing page",
223
+ "agent extended the landing page with a new section",
224
+ "agent should NOT say it can't read the file",
225
+ "agent should NOT produce incomplete code or be too lazy to finish",
226
+ ],
227
+ ),
228
+ ],
229
+ max_turns=5, # optional
230
+ )
231
+ ```
232
+
233
+ Check out the fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
234
+
235
+ You can also combine it with a partial script too! By for example controlling only the beginning of the conversation, and let the rest proceed on autopilot, see the next section.
236
+
237
+ ## Full Control of the Conversation
238
+
239
+ You can specify a script for guiding the scenario by passing a list of steps to the `script` field, those steps are simply arbitrary functions that take the current state of the scenario as an argument, so you can do things like:
240
+
241
+ - Control what the user says, or let it be generated automatically
242
+ - Control what the agent says, or let it be generated automatically
243
+ - Add custom assertions, for example making sure a tool was called
244
+ - Add a custom evaluation, from an external library
245
+ - Let the simulation proceed for a certain number of turns, and evaluate at each new turn
246
+ - Trigger the judge agent to decide on a verdict
247
+ - Add arbitrary messages like mock tool calls in the middle of the conversation
248
+
249
+ Everything is possible, using the same simple structure:
250
+
251
+ ```python
252
+ @pytest.mark.agent_test
253
+ @pytest.mark.asyncio
254
+ async def test_early_assumption_bias():
255
+ result = await scenario.run(
256
+ name="early assumption bias",
257
+ description="""
258
+ The agent makes false assumption that the user is talking about an ATM bank, and user corrects it that they actually mean river banks
259
+ """,
260
+ agents=[
261
+ Agent(),
262
+ scenario.UserSimulatorAgent(),
263
+ scenario.JudgeAgent(
264
+ criteria=[
265
+ "user should get good recommendations on river crossing",
266
+ "agent should NOT keep following up about ATM recommendation after user has corrected them that they are actually just hiking",
267
+ ],
268
+ ),
269
+ ],
270
+ max_turns=10,
271
+ script=[
272
+ # Define hardcoded messages
273
+ scenario.agent("Hello, how can I help you today?"),
274
+ scenario.user("how do I safely approach a bank?"),
275
+
276
+ # Or let it be generated automatically
277
+ scenario.agent(),
278
+
279
+ # Add custom assertions, for example making sure a tool was called
280
+ check_if_tool_was_called,
281
+
282
+ # Generate a user follow-up message
283
+ scenario.user(),
284
+
285
+ # Let the simulation proceed for 2 more turns, print at every turn
286
+ scenario.proceed(
287
+ turns=2,
288
+ on_turn=lambda state: print(f"Turn {state.current_turn}: {state.messages}"),
289
+ ),
290
+
291
+ # Time to make a judgment call
292
+ scenario.judge(),
293
+ ],
294
+ )
295
+
296
+ assert result.success
297
+ ```
298
+
299
+ ## Debug mode
300
+
301
+ You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
302
+
303
+ Debug mode allows you to see the messages in slow motion step by step, and intervene with your own inputs to debug your agent from the middle of the conversation.
304
+
305
+ ```python
306
+ scenario.configure(default_model="openai/gpt-4.1-mini", debug=True)
307
+ ```
308
+
309
+ or
310
+
311
+ ```bash
312
+ pytest -s tests/test_vegetarian_recipe_agent.py --debug
313
+ ```
314
+
315
+ ## Cache
316
+
317
+ Each time the scenario runs, the testing agent might chose a different input to start, this is good to make sure it covers the variance of real users as well, however we understand that the non-deterministic nature of it might make it less repeatable, costly and harder to debug. To solve for it, you can use the `cache_key` field in the `Scenario.configure` method or in the specific scenario you are running, this will make the testing agent give the same input for given the same scenario:
318
+
319
+ ```python
320
+ scenario.configure(default_model="openai/gpt-4.1-mini", cache_key="42")
321
+ ```
322
+
323
+ To bust the cache, you can simply pass a different `cache_key`, disable it, or delete the cache files located at `~/.scenario/cache`.
324
+
325
+ To go a step further and fully cache the test end-to-end, you can also wrap the LLM calls or any other non-deterministic functions in your application side with the `@scenario.cache` decorator:
326
+
327
+ ```python
328
+ # Inside your actual agent implementation
329
+ class MyAgent:
330
+ @scenario.cache()
331
+ def invoke(self, message, context):
332
+ return client.chat.completions.create(
333
+ # ...
334
+ )
335
+ ```
336
+
337
+ This will cache any function call you decorate when running the tests and make them repeatable, hashed by the function arguments, the scenario being executed, and the `cache_key` you provided. You can exclude arguments that should not be hashed for the cache key by naming them in the `ignore` argument.
338
+
339
+ ## Disable Output
340
+
341
+ You can remove the `-s` flag from pytest to hide the output during test, which will only show up if the test fails. Alternatively, you can set `verbose=False` in the `Scenario.configure` method or in the specific scenario you are running.
342
+
343
+ ## Running in parallel
344
+
345
+ As the number of your scenarios grows, you might want to run them in parallel to speed up your whole test suite. We suggest you to use the [pytest-asyncio-concurrent](https://pypi.org/project/pytest-asyncio-concurrent/) plugin to do so.
346
+
347
+ Simply install the plugin from the link above, then replace the `@pytest.mark.asyncio` annotation in the tests with `@pytest.mark.asyncio_concurrent`, adding a group name to it to mark the group of scenarions that should be run in parallel together, e.g.:
348
+
349
+ ```python
350
+ @pytest.mark.agent_test
351
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
352
+ async def test_vegetarian_recipe_agent():
353
+ # ...
354
+
355
+ @pytest.mark.agent_test
356
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
357
+ async def test_user_is_very_hungry():
358
+ # ...
359
+ ```
360
+
361
+ Those two scenarios should now run in parallel.
362
+
363
+ ## Events System
364
+
365
+ Scenario automatically publishes events during execution for monitoring and observability. You can enable event reporting by setting environment variables:
366
+
367
+ ```bash
368
+ # Enable automatic event reporting
369
+ export LANGWATCH_ENDPOINT="https://api.langwatch.ai"
370
+ export LANGWATCH_API_KEY="your-api-key"
371
+ ```
372
+
373
+ With these variables set, Scenario will automatically:
374
+
375
+ - Publish events when scenarios start, finish, and when messages are added
376
+ - Handle retries and error handling automatically
377
+ - Process events asynchronously without blocking your tests
378
+
379
+ The events include timing information, conversation history, and success/failure metrics for analysis.
380
+
381
+ For advanced customization, see the event classes in the codebase for detailed documentation.
382
+
383
+ ## License
384
+
385
+ MIT License
@@ -0,0 +1,27 @@
1
+ scenario/__init__.py,sha256=UJ5l-sG4TMG0wR8Ba-dxdDW36m3apTvawP-lNvk7Jm0,4293
2
+ scenario/_error_messages.py,sha256=6lEx3jBGMbPx0kG0eX5zoZE-ENVM3O_ZkIbVMlnidYs,3892
3
+ scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
4
+ scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
5
+ scenario/config.py,sha256=xhUuXH-sThwPTmJNSuajKxX-WC_tcFwJ1jZc119DswA,6093
6
+ scenario/judge_agent.py,sha256=9CCO699qoWqXvWdQ73Yc3dqPOwaJdJ-zqxVaLaKi_cA,16161
7
+ scenario/pytest_plugin.py,sha256=f2ETBpATz80k7K87M6046ZIFiQpHEvDN7dxakd3y2wk,11321
8
+ scenario/scenario_executor.py,sha256=nkSIuIlwPHfr6pueSBbARrgiqPtW0SxajV3PFypAnJ4,34508
9
+ scenario/scenario_state.py,sha256=dQDjazem-dn1c5mw6TwngEu6Tv_cHwEzemepsPBy2f0,7039
10
+ scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
11
+ scenario/types.py,sha256=BhXcTEMGyGg_1QysN-GXVjm8DP2VH3UEzj_qvoglp2k,9466
12
+ scenario/user_simulator_agent.py,sha256=fhwi8W44s343BGrjJXSJw960wcK7MgwTg-epxR1bqHo,9088
13
+ scenario/_utils/__init__.py,sha256=wNX9hU8vzYlyLDwjkt7JUW3IPo2DhME6UIt_zvLM3B0,1000
14
+ scenario/_utils/ids.py,sha256=K1iPuJgPh3gX9HCrDZGqK5lDgdwZXfOBF1YXVOWNHRg,1843
15
+ scenario/_utils/message_conversion.py,sha256=AM9DLyWpy97CrAH8RmId9Mv2rmLquQhFoUpRyp-jVeY,3622
16
+ scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
17
+ scenario/events/__init__.py,sha256=_autF1cMZYpNXE-kJNvvRb-H_hYqy4gOSSp2fT3Wi9k,1533
18
+ scenario/events/event_bus.py,sha256=MThIMIaI2nj2CoegZazTNxeHbtl4_M7bW3vEAHz6R8g,7102
19
+ scenario/events/event_reporter.py,sha256=cMh_5jA5hG3Q9IsoAgPJhxnIVs_M1Q0e2lgLTEK4oPc,3100
20
+ scenario/events/events.py,sha256=jPXylwiADb0Bdk7u1YkAaU_jLebH7NW8J7SZI9JDTxw,6750
21
+ scenario/events/messages.py,sha256=1QAkwDExdF6AHgXdEFhHwmCv3Mxu3j0AXIptMekc_bg,3299
22
+ scenario/events/utils.py,sha256=yrTUTByeb0eAAQniQH7EyKs-usgGti8f17IemUyBZBw,3357
23
+ langwatch_scenario-0.6.0.dist-info/METADATA,sha256=IvD9on4tP57ldmizFzfGQBtiCT6Z7yoz0trlCSPSW9M,14227
24
+ langwatch_scenario-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
+ langwatch_scenario-0.6.0.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
26
+ langwatch_scenario-0.6.0.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
27
+ langwatch_scenario-0.6.0.dist-info/RECORD,,
scenario/__init__.py CHANGED
@@ -1,36 +1,147 @@
1
1
  """
2
- Scenario: A testing library for conversational agents.
2
+ Scenario: Agent Testing Framework through Simulation Testing
3
+
4
+ Scenario is a comprehensive testing framework for AI agents that uses simulation testing
5
+ to validate agent behavior through realistic conversations. It enables testing of both
6
+ happy paths and edge cases by simulating user interactions and evaluating agent responses
7
+ against configurable success criteria.
8
+
9
+ Key Features:
10
+
11
+ - End-to-end conversation testing with specified scenarios
12
+
13
+ - Flexible control from fully scripted to completely automated simulations
14
+
15
+ - Multi-turn evaluation designed for complex conversational agents
16
+
17
+ - Works with any testing framework (pytest, unittest, etc.)
18
+
19
+ - Framework-agnostic integration with any LLM or agent architecture
20
+
21
+ - Built-in caching for deterministic and faster test execution
22
+
23
+ Basic Usage:
24
+
25
+ import scenario
26
+
27
+ # Configure global settings
28
+ scenario.configure(default_model="openai/gpt-4.1-mini")
29
+
30
+ # Create your agent adapter
31
+ class MyAgent(scenario.AgentAdapter):
32
+ async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
33
+ return my_agent_function(input.last_new_user_message_str())
34
+
35
+ # Run a scenario test
36
+ result = await scenario.run(
37
+ name="customer service test",
38
+ description="Customer asks about billing, agent should help politely",
39
+ agents=[
40
+ MyAgent(),
41
+ scenario.UserSimulatorAgent(),
42
+ scenario.JudgeAgent(criteria=[
43
+ "Agent is polite and professional",
44
+ "Agent addresses the billing question",
45
+ "Agent provides clear next steps"
46
+ ])
47
+ ]
48
+ )
49
+
50
+ assert result.success
51
+
52
+ Advanced Usage:
53
+
54
+ # Script-controlled scenario with custom evaluations
55
+ def check_tool_usage(state: scenario.ScenarioState) -> None:
56
+ assert state.has_tool_call("get_customer_info")
57
+
58
+ result = await scenario.run(
59
+ name="scripted interaction",
60
+ description="Test specific conversation flow",
61
+ agents=[
62
+ MyAgent(),
63
+ scenario.UserSimulatorAgent(),
64
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
65
+ ],
66
+ script=[
67
+ scenario.user("I have a billing question"),
68
+ scenario.agent(),
69
+ check_tool_usage, # Custom assertion
70
+ scenario.proceed(turns=2), # Let it continue automatically
71
+ scenario.succeed("All requirements met")
72
+ ]
73
+ )
74
+
75
+ Integration with Testing Frameworks:
76
+
77
+ import pytest
78
+
79
+ @pytest.mark.agent_test
80
+ @pytest.mark.asyncio
81
+ async def test_weather_agent():
82
+ result = await scenario.run(
83
+ name="weather query",
84
+ description="User asks about weather in a specific city",
85
+ agents=[
86
+ WeatherAgent(),
87
+ scenario.UserSimulatorAgent(),
88
+ scenario.JudgeAgent(criteria=["Provides accurate weather information"])
89
+ ]
90
+ )
91
+ assert result.success
92
+
93
+ For more examples and detailed documentation, visit: https://github.com/langwatch/scenario
3
94
  """
4
95
 
5
96
  # First import non-dependent modules
6
- from .types import ScenarioResult, AgentInput, ScenarioAgentRole, AgentReturnTypes
97
+ from .types import ScenarioResult, AgentInput, AgentRole, AgentReturnTypes
7
98
  from .config import ScenarioConfig
8
99
 
9
100
  # Then import modules with dependencies
10
- from .scenario_agent_adapter import ScenarioAgentAdapter
11
- from .testing_agent import TestingAgent
12
- from .scenario import Scenario
101
+ from .scenario_executor import ScenarioExecutor
102
+ from .scenario_state import ScenarioState
103
+ from .agent_adapter import AgentAdapter
104
+ from .judge_agent import JudgeAgent
105
+ from .user_simulator_agent import UserSimulatorAgent
13
106
  from .cache import scenario_cache
107
+ from .script import message, user, agent, judge, proceed, succeed, fail
14
108
 
15
109
  # Import pytest plugin components
16
- from .pytest_plugin import pytest_configure, scenario_reporter
110
+ # from .pytest_plugin import pytest_configure, scenario_reporter
111
+
112
+ run = ScenarioExecutor.run
113
+
114
+ configure = ScenarioConfig.configure
115
+
116
+ default_config = ScenarioConfig.default_config
117
+
118
+ cache = scenario_cache
17
119
 
18
120
  __all__ = [
121
+ # Functions
122
+ "run",
123
+ "configure",
124
+ "default_config",
125
+ "cache",
126
+ # Script
127
+ "message",
128
+ "proceed",
129
+ "succeed",
130
+ "fail",
131
+ "judge",
132
+ "agent",
133
+ "user",
19
134
  # Types
20
135
  "ScenarioResult",
21
136
  "AgentInput",
22
- "ScenarioAgentRole",
137
+ "AgentRole",
23
138
  "ScenarioConfig",
24
139
  "AgentReturnTypes",
25
-
26
140
  # Classes
27
- "Scenario",
28
- "ScenarioAgentAdapter",
29
- "TestingAgent",
30
-
31
- # Plugins
32
- "pytest_configure",
33
- "scenario_reporter",
34
- "scenario_cache",
141
+ "ScenarioExecutor",
142
+ "ScenarioState",
143
+ "AgentAdapter",
144
+ "UserSimulatorAgent",
145
+ "JudgeAgent",
35
146
  ]
36
- __version__ = "0.1.0"
147
+ __version__ = "0.1.0"
@@ -3,48 +3,18 @@ from typing import Any
3
3
  import termcolor
4
4
 
5
5
 
6
- default_config_error_message = f"""
7
-
8
- {termcolor.colored("->", "cyan")} Please set a default config with at least a testing_agent model for running your scenarios at the top of your test file, for example:
9
-
10
- from scenario import Scenario, TestingAgent
11
-
12
- Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
13
- {termcolor.colored("^" * 74, "green")}
14
-
15
- @pytest.mark.agent_test
16
- def test_vegetarian_recipe_agent():
17
- scenario = Scenario(
18
- # ...
19
- )
20
- result = scenario.run()
21
-
22
- assert result.success
23
-
24
-
25
- {termcolor.colored("->", "cyan")} Alternatively, you can set the config specifically for this scenario:
26
-
27
- from scenario import Scenario, TestingAgent
28
-
29
- @pytest.mark.agent_test
30
- def test_vegetarian_recipe_agent():
31
- scenario = Scenario(
32
- # ...
33
- testing_agent=TestingAgent(model="openai/gpt-4o-mini")
34
- {termcolor.colored("^" * 54, "green")}
35
- )
36
- result = scenario.run()
37
-
38
- assert result.success
39
- """
6
+ def agent_not_configured_error_message(class_name: str):
7
+ return f"""
40
8
 
9
+ {termcolor.colored("->", "cyan")} {class_name} was initialized without a model, please set the model when defining the testing agent, for example:
41
10
 
42
- testing_agent_not_configured_error_message = f"""
11
+ {class_name}(model="openai/gpt-4.1-mini")
12
+ {termcolor.colored("^" * (29 + len(class_name)), "green")}
43
13
 
44
- {termcolor.colored("->", "cyan")} Testing agent was initialized without a model, please set the model when defining the testing agent, for example:
14
+ {termcolor.colored("->", "cyan")} Alternatively, you can set the default model globally, for example:
45
15
 
46
- TestingAgent.with_config(model="openai/gpt-4.1-mini")
47
- {termcolor.colored("^" * 53, "green")}
16
+ scenario.configure(default_model="openai/gpt-4.1-mini")
17
+ {termcolor.colored("^" * 55, "green")}
48
18
  """
49
19
 
50
20
 
@@ -0,0 +1,32 @@
1
+ """
2
+ Utility functions for scenario execution and message handling.
3
+
4
+ This module provides various utility functions used throughout the Scenario framework,
5
+ including message formatting, validation, role reversal, and UI components like spinners
6
+ for better user experience during scenario execution.
7
+ """
8
+
9
+ from .message_conversion import convert_agent_return_types_to_openai_messages
10
+ from .ids import get_or_create_batch_run_id, generate_scenario_run_id
11
+ from .utils import (
12
+ SerializableAndPydanticEncoder,
13
+ SerializableWithStringFallback,
14
+ print_openai_messages,
15
+ show_spinner,
16
+ check_valid_return_type,
17
+ reverse_roles,
18
+ await_if_awaitable,
19
+ )
20
+
21
+ __all__ = [
22
+ "convert_agent_return_types_to_openai_messages",
23
+ "get_or_create_batch_run_id",
24
+ "generate_scenario_run_id",
25
+ "SerializableAndPydanticEncoder",
26
+ "SerializableWithStringFallback",
27
+ "print_openai_messages",
28
+ "show_spinner",
29
+ "check_valid_return_type",
30
+ "reverse_roles",
31
+ "await_if_awaitable",
32
+ ]