langwatch-scenario 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/PKG-INFO +60 -12
  2. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/README.md +56 -10
  3. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/langwatch_scenario.egg-info/PKG-INFO +60 -12
  4. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/langwatch_scenario.egg-info/SOURCES.txt +6 -2
  5. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/langwatch_scenario.egg-info/requires.txt +3 -1
  6. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/pyproject.toml +18 -9
  7. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/scenario/__init__.py +13 -3
  8. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/scenario/config.py +18 -7
  9. langwatch_scenario-0.3.0/scenario/error_messages.py +134 -0
  10. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/scenario/pytest_plugin.py +1 -1
  11. langwatch_scenario-0.3.0/scenario/scenario.py +238 -0
  12. langwatch_scenario-0.3.0/scenario/scenario_agent_adapter.py +16 -0
  13. langwatch_scenario-0.3.0/scenario/scenario_executor.py +466 -0
  14. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/scenario/testing_agent.py +75 -58
  15. langwatch_scenario-0.3.0/scenario/types.py +96 -0
  16. langwatch_scenario-0.3.0/scenario/utils.py +264 -0
  17. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/setup.py +1 -1
  18. langwatch_scenario-0.3.0/tests/test_scenario.py +434 -0
  19. langwatch_scenario-0.3.0/tests/test_scenario_agent.py +39 -0
  20. langwatch_scenario-0.3.0/tests/test_scenario_executor.py +162 -0
  21. langwatch_scenario-0.2.0/scenario/error_messages.py +0 -76
  22. langwatch_scenario-0.2.0/scenario/result.py +0 -74
  23. langwatch_scenario-0.2.0/scenario/scenario.py +0 -123
  24. langwatch_scenario-0.2.0/scenario/scenario_executor.py +0 -204
  25. langwatch_scenario-0.2.0/scenario/utils.py +0 -121
  26. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/langwatch_scenario.egg-info/dependency_links.txt +0 -0
  27. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/langwatch_scenario.egg-info/entry_points.txt +0 -0
  28. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/langwatch_scenario.egg-info/top_level.txt +0 -0
  29. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/scenario/cache.py +0 -0
  30. {langwatch_scenario-0.2.0 → langwatch_scenario-0.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langwatch-scenario
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: The end-to-end agent testing library
5
5
  Author-email: LangWatch Team <support@langwatch.ai>
6
6
  License: MIT
@@ -25,11 +25,13 @@ Requires-Dist: joblib>=1.4.2
25
25
  Requires-Dist: wrapt>=1.17.2
26
26
  Requires-Dist: pytest-asyncio>=0.26.0
27
27
  Requires-Dist: rich<15.0.0,>=13.3.3
28
+ Requires-Dist: pksuid>=1.1.2
28
29
  Provides-Extra: dev
29
30
  Requires-Dist: black; extra == "dev"
30
31
  Requires-Dist: isort; extra == "dev"
31
- Requires-Dist: mypy; extra == "dev"
32
32
  Requires-Dist: pytest-cov; extra == "dev"
33
+ Requires-Dist: pre-commit; extra == "dev"
34
+ Requires-Dist: commitizen; extra == "dev"
33
35
 
34
36
  ![scenario](https://github.com/langwatch/scenario/raw/main/assets/scenario-wide.webp)
35
37
 
@@ -39,9 +41,9 @@ Requires-Dist: pytest-cov; extra == "dev"
39
41
 
40
42
  # Scenario: Use an Agent to test your Agent
41
43
 
42
- Scenario is a library for testing agents end-to-end as a human would, but without having to manually do it. The automated testing agent covers every single scenario for you.
44
+ Scenario is an Agent Testing Framework for testing AI agents through Simulation Testing.
43
45
 
44
- You define the scenarios, and the testing agent will simulate your users as it follows them, it will keep chatting and evaluating your agent until it reaches the desired goal or detects an unexpected behavior.
46
+ You define the scenarios, and the testing agent will simulate a real user as it follows them, it will keep chatting back and forth with _your_ agent to play out the simulation, until it reaches the desired goal or detects an unexpected behavior based on the criteria you defined.
45
47
 
46
48
  [📺 Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
47
49
 
@@ -63,20 +65,23 @@ Now create your first scenario and save it as `tests/test_vegetarian_recipe_agen
63
65
  ```python
64
66
  import pytest
65
67
 
66
- from scenario import Scenario, TestingAgent, scenario_cache
68
+ from scenario import Scenario, TestingAgent, ScenarioAgentAdapter, AgentInput, AgentReturnTypes, scenario_cache
67
69
 
68
70
  Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
69
71
 
70
72
 
73
+ # Create an adapter to call your agent
74
+ class VegetarianRecipeAgentAdapter(ScenarioAgentAdapter):
75
+ def __init__(self, input: AgentInput):
76
+ self.agent = VegetarianRecipeAgent()
77
+
78
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
79
+ return self.agent.run(input.last_new_user_message_str())
80
+
81
+
71
82
  @pytest.mark.agent_test
72
83
  @pytest.mark.asyncio
73
84
  async def test_vegetarian_recipe_agent():
74
- agent = VegetarianRecipeAgent()
75
-
76
- def vegetarian_recipe_agent(message, context):
77
- # Call your agent here
78
- return agent.run(message)
79
-
80
85
  # Define the simulated scenario
81
86
  scenario = Scenario(
82
87
  name="dinner idea",
@@ -133,7 +138,7 @@ class VegetarianRecipeAgent:
133
138
  message = response.choices[0].message # type: ignore
134
139
  self.history.append(message)
135
140
 
136
- return {"messages": [message]}
141
+ return [message]
137
142
 
138
143
  ```
139
144
 
@@ -186,6 +191,49 @@ result = await scenario.run()
186
191
 
187
192
  You can find a fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
188
193
 
194
+ ## Specify a script for guiding the scenario
195
+
196
+ You can specify a script for guiding the scenario by passing a list of steps to the `script` field.
197
+
198
+ ```python
199
+ @pytest.mark.agent_test
200
+ @pytest.mark.asyncio
201
+ async def test_ai_assistant_agent():
202
+ scenario = Scenario(
203
+ name="false assumptions",
204
+ description="""
205
+ The agent makes false assumption about being an ATM bank, and user corrects it
206
+ """,
207
+ agent=AiAssistantAgentAdapter,
208
+ criteria=[
209
+ "user should get good recommendations on river crossing",
210
+ "agent should NOT follow up about ATM recommendation after user has corrected them they are just hiking",
211
+ ],
212
+ max_turns=5,
213
+ )
214
+
215
+ def check_if_tool_was_called(state: ScenarioExecutor) -> None:
216
+ assert state.has_tool_call("web_search")
217
+
218
+ result = await scenario.script(
219
+ [
220
+ # Define existing history of messages
221
+ scenario.user("how do I safely approach a bank?"),
222
+ # Or let it be generate automatically
223
+ scenario.agent(),
224
+ # Add custom assertions, for example making sure a tool was called
225
+ check_if_tool_was_called,
226
+ scenario.user(),
227
+ # Let the simulation proceed for 2 more turns
228
+ scenario.proceed(turns=2),
229
+ # Time to make a judgment call
230
+ scenario.judge(),
231
+ ]
232
+ ).run()
233
+
234
+ assert result.success
235
+ ```
236
+
189
237
  ## Debug mode
190
238
 
191
239
  You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
@@ -6,9 +6,9 @@
6
6
 
7
7
  # Scenario: Use an Agent to test your Agent
8
8
 
9
- Scenario is a library for testing agents end-to-end as a human would, but without having to manually do it. The automated testing agent covers every single scenario for you.
9
+ Scenario is an Agent Testing Framework for testing AI agents through Simulation Testing.
10
10
 
11
- You define the scenarios, and the testing agent will simulate your users as it follows them, it will keep chatting and evaluating your agent until it reaches the desired goal or detects an unexpected behavior.
11
+ You define the scenarios, and the testing agent will simulate a real user as it follows them, it will keep chatting back and forth with _your_ agent to play out the simulation, until it reaches the desired goal or detects an unexpected behavior based on the criteria you defined.
12
12
 
13
13
  [📺 Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
14
14
 
@@ -30,20 +30,23 @@ Now create your first scenario and save it as `tests/test_vegetarian_recipe_agen
30
30
  ```python
31
31
  import pytest
32
32
 
33
- from scenario import Scenario, TestingAgent, scenario_cache
33
+ from scenario import Scenario, TestingAgent, ScenarioAgentAdapter, AgentInput, AgentReturnTypes, scenario_cache
34
34
 
35
35
  Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
36
36
 
37
37
 
38
+ # Create an adapter to call your agent
39
+ class VegetarianRecipeAgentAdapter(ScenarioAgentAdapter):
40
+ def __init__(self, input: AgentInput):
41
+ self.agent = VegetarianRecipeAgent()
42
+
43
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
44
+ return self.agent.run(input.last_new_user_message_str())
45
+
46
+
38
47
  @pytest.mark.agent_test
39
48
  @pytest.mark.asyncio
40
49
  async def test_vegetarian_recipe_agent():
41
- agent = VegetarianRecipeAgent()
42
-
43
- def vegetarian_recipe_agent(message, context):
44
- # Call your agent here
45
- return agent.run(message)
46
-
47
50
  # Define the simulated scenario
48
51
  scenario = Scenario(
49
52
  name="dinner idea",
@@ -100,7 +103,7 @@ class VegetarianRecipeAgent:
100
103
  message = response.choices[0].message # type: ignore
101
104
  self.history.append(message)
102
105
 
103
- return {"messages": [message]}
106
+ return [message]
104
107
 
105
108
  ```
106
109
 
@@ -153,6 +156,49 @@ result = await scenario.run()
153
156
 
154
157
  You can find a fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
155
158
 
159
+ ## Specify a script for guiding the scenario
160
+
161
+ You can specify a script for guiding the scenario by passing a list of steps to the `script` field.
162
+
163
+ ```python
164
+ @pytest.mark.agent_test
165
+ @pytest.mark.asyncio
166
+ async def test_ai_assistant_agent():
167
+ scenario = Scenario(
168
+ name="false assumptions",
169
+ description="""
170
+ The agent makes false assumption about being an ATM bank, and user corrects it
171
+ """,
172
+ agent=AiAssistantAgentAdapter,
173
+ criteria=[
174
+ "user should get good recommendations on river crossing",
175
+ "agent should NOT follow up about ATM recommendation after user has corrected them they are just hiking",
176
+ ],
177
+ max_turns=5,
178
+ )
179
+
180
+ def check_if_tool_was_called(state: ScenarioExecutor) -> None:
181
+ assert state.has_tool_call("web_search")
182
+
183
+ result = await scenario.script(
184
+ [
185
+ # Define existing history of messages
186
+ scenario.user("how do I safely approach a bank?"),
187
+ # Or let it be generate automatically
188
+ scenario.agent(),
189
+ # Add custom assertions, for example making sure a tool was called
190
+ check_if_tool_was_called,
191
+ scenario.user(),
192
+ # Let the simulation proceed for 2 more turns
193
+ scenario.proceed(turns=2),
194
+ # Time to make a judgment call
195
+ scenario.judge(),
196
+ ]
197
+ ).run()
198
+
199
+ assert result.success
200
+ ```
201
+
156
202
  ## Debug mode
157
203
 
158
204
  You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langwatch-scenario
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: The end-to-end agent testing library
5
5
  Author-email: LangWatch Team <support@langwatch.ai>
6
6
  License: MIT
@@ -25,11 +25,13 @@ Requires-Dist: joblib>=1.4.2
25
25
  Requires-Dist: wrapt>=1.17.2
26
26
  Requires-Dist: pytest-asyncio>=0.26.0
27
27
  Requires-Dist: rich<15.0.0,>=13.3.3
28
+ Requires-Dist: pksuid>=1.1.2
28
29
  Provides-Extra: dev
29
30
  Requires-Dist: black; extra == "dev"
30
31
  Requires-Dist: isort; extra == "dev"
31
- Requires-Dist: mypy; extra == "dev"
32
32
  Requires-Dist: pytest-cov; extra == "dev"
33
+ Requires-Dist: pre-commit; extra == "dev"
34
+ Requires-Dist: commitizen; extra == "dev"
33
35
 
34
36
  ![scenario](https://github.com/langwatch/scenario/raw/main/assets/scenario-wide.webp)
35
37
 
@@ -39,9 +41,9 @@ Requires-Dist: pytest-cov; extra == "dev"
39
41
 
40
42
  # Scenario: Use an Agent to test your Agent
41
43
 
42
- Scenario is a library for testing agents end-to-end as a human would, but without having to manually do it. The automated testing agent covers every single scenario for you.
44
+ Scenario is an Agent Testing Framework for testing AI agents through Simulation Testing.
43
45
 
44
- You define the scenarios, and the testing agent will simulate your users as it follows them, it will keep chatting and evaluating your agent until it reaches the desired goal or detects an unexpected behavior.
46
+ You define the scenarios, and the testing agent will simulate a real user as it follows them, it will keep chatting back and forth with _your_ agent to play out the simulation, until it reaches the desired goal or detects an unexpected behavior based on the criteria you defined.
45
47
 
46
48
  [📺 Video Tutorial](https://www.youtube.com/watch?v=f8NLpkY0Av4)
47
49
 
@@ -63,20 +65,23 @@ Now create your first scenario and save it as `tests/test_vegetarian_recipe_agen
63
65
  ```python
64
66
  import pytest
65
67
 
66
- from scenario import Scenario, TestingAgent, scenario_cache
68
+ from scenario import Scenario, TestingAgent, ScenarioAgentAdapter, AgentInput, AgentReturnTypes, scenario_cache
67
69
 
68
70
  Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
69
71
 
70
72
 
73
+ # Create an adapter to call your agent
74
+ class VegetarianRecipeAgentAdapter(ScenarioAgentAdapter):
75
+ def __init__(self, input: AgentInput):
76
+ self.agent = VegetarianRecipeAgent()
77
+
78
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
79
+ return self.agent.run(input.last_new_user_message_str())
80
+
81
+
71
82
  @pytest.mark.agent_test
72
83
  @pytest.mark.asyncio
73
84
  async def test_vegetarian_recipe_agent():
74
- agent = VegetarianRecipeAgent()
75
-
76
- def vegetarian_recipe_agent(message, context):
77
- # Call your agent here
78
- return agent.run(message)
79
-
80
85
  # Define the simulated scenario
81
86
  scenario = Scenario(
82
87
  name="dinner idea",
@@ -133,7 +138,7 @@ class VegetarianRecipeAgent:
133
138
  message = response.choices[0].message # type: ignore
134
139
  self.history.append(message)
135
140
 
136
- return {"messages": [message]}
141
+ return [message]
137
142
 
138
143
  ```
139
144
 
@@ -186,6 +191,49 @@ result = await scenario.run()
186
191
 
187
192
  You can find a fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
188
193
 
194
+ ## Specify a script for guiding the scenario
195
+
196
+ You can specify a script for guiding the scenario by passing a list of steps to the `script` field.
197
+
198
+ ```python
199
+ @pytest.mark.agent_test
200
+ @pytest.mark.asyncio
201
+ async def test_ai_assistant_agent():
202
+ scenario = Scenario(
203
+ name="false assumptions",
204
+ description="""
205
+ The agent makes false assumption about being an ATM bank, and user corrects it
206
+ """,
207
+ agent=AiAssistantAgentAdapter,
208
+ criteria=[
209
+ "user should get good recommendations on river crossing",
210
+ "agent should NOT follow up about ATM recommendation after user has corrected them they are just hiking",
211
+ ],
212
+ max_turns=5,
213
+ )
214
+
215
+ def check_if_tool_was_called(state: ScenarioExecutor) -> None:
216
+ assert state.has_tool_call("web_search")
217
+
218
+ result = await scenario.script(
219
+ [
220
+ # Define existing history of messages
221
+ scenario.user("how do I safely approach a bank?"),
222
+ # Or let it be generate automatically
223
+ scenario.agent(),
224
+ # Add custom assertions, for example making sure a tool was called
225
+ check_if_tool_was_called,
226
+ scenario.user(),
227
+ # Let the simulation proceed for 2 more turns
228
+ scenario.proceed(turns=2),
229
+ # Time to make a judgment call
230
+ scenario.judge(),
231
+ ]
232
+ ).run()
233
+
234
+ assert result.success
235
+ ```
236
+
189
237
  ## Debug mode
190
238
 
191
239
  You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
@@ -12,8 +12,12 @@ scenario/cache.py
12
12
  scenario/config.py
13
13
  scenario/error_messages.py
14
14
  scenario/pytest_plugin.py
15
- scenario/result.py
16
15
  scenario/scenario.py
16
+ scenario/scenario_agent_adapter.py
17
17
  scenario/scenario_executor.py
18
18
  scenario/testing_agent.py
19
- scenario/utils.py
19
+ scenario/types.py
20
+ scenario/utils.py
21
+ tests/test_scenario.py
22
+ tests/test_scenario_agent.py
23
+ tests/test_scenario_executor.py
@@ -7,9 +7,11 @@ joblib>=1.4.2
7
7
  wrapt>=1.17.2
8
8
  pytest-asyncio>=0.26.0
9
9
  rich<15.0.0,>=13.3.3
10
+ pksuid>=1.1.2
10
11
 
11
12
  [dev]
12
13
  black
13
14
  isort
14
- mypy
15
15
  pytest-cov
16
+ pre-commit
17
+ commitizen
@@ -4,13 +4,11 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "langwatch-scenario"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "The end-to-end agent testing library"
9
9
  readme = "README.md"
10
- authors = [
11
- {name = "LangWatch Team", email = "support@langwatch.ai"}
12
- ]
13
- license = {text = "MIT"}
10
+ authors = [{ name = "LangWatch Team", email = "support@langwatch.ai" }]
11
+ license = { text = "MIT" }
14
12
  requires-python = ">=3.9"
15
13
  classifiers = [
16
14
  "Development Status :: 4 - Beta",
@@ -32,14 +30,16 @@ dependencies = [
32
30
  "wrapt>=1.17.2",
33
31
  "pytest-asyncio>=0.26.0",
34
32
  "rich>=13.3.3,<15.0.0",
33
+ "pksuid>=1.1.2",
35
34
  ]
36
35
 
37
36
  [project.optional-dependencies]
38
37
  dev = [
39
38
  "black",
40
39
  "isort",
41
- "mypy",
42
40
  "pytest-cov",
41
+ "pre-commit",
42
+ "commitizen",
43
43
  ]
44
44
 
45
45
  [project.urls]
@@ -47,12 +47,21 @@ dev = [
47
47
  "Bug Tracker" = "https://github.com/langwatch/scenario/issues"
48
48
 
49
49
  [tool.pytest.ini_options]
50
- markers = [
51
- "agent_test: marks tests as agent scenario tests",
52
- ]
50
+ markers = ["agent_test: marks tests as agent scenario tests"]
53
51
 
54
52
  [dependency-groups]
55
53
  dev = [
54
+ "commitizen>=4.8.3",
55
+ "pre-commit>=4.2.0",
56
56
  "pydantic-ai>=0.0.52",
57
+ "pyright>=1.1.401",
57
58
  "pytest-asyncio-concurrent>=0.4.1",
58
59
  ]
60
+
61
+ [tool.commitizen]
62
+ name = "cz_conventional_commits"
63
+ version = "0.2.0"
64
+ tag_format = "v$version"
65
+ version_files = ["pyproject.toml:version"]
66
+ bump_message = "bump: version $current_version → $new_version"
67
+ major_version_zero = true
@@ -3,10 +3,11 @@ Scenario: A testing library for conversational agents.
3
3
  """
4
4
 
5
5
  # First import non-dependent modules
6
- from .result import ScenarioResult
6
+ from .types import ScenarioResult, AgentInput, ScenarioAgentRole, AgentReturnTypes
7
7
  from .config import ScenarioConfig
8
8
 
9
9
  # Then import modules with dependencies
10
+ from .scenario_agent_adapter import ScenarioAgentAdapter
10
11
  from .testing_agent import TestingAgent
11
12
  from .scenario import Scenario
12
13
  from .cache import scenario_cache
@@ -15,10 +16,19 @@ from .cache import scenario_cache
15
16
  from .pytest_plugin import pytest_configure, scenario_reporter
16
17
 
17
18
  __all__ = [
18
- "Scenario",
19
- "TestingAgent",
19
+ # Types
20
20
  "ScenarioResult",
21
+ "AgentInput",
22
+ "ScenarioAgentRole",
21
23
  "ScenarioConfig",
24
+ "AgentReturnTypes",
25
+
26
+ # Classes
27
+ "Scenario",
28
+ "ScenarioAgentAdapter",
29
+ "TestingAgent",
30
+
31
+ # Plugins
22
32
  "pytest_configure",
23
33
  "scenario_reporter",
24
34
  "scenario_cache",
@@ -2,10 +2,16 @@
2
2
  Configuration module for Scenario.
3
3
  """
4
4
 
5
- from typing import Optional, Union
5
+ from typing import TYPE_CHECKING, Any, Optional, Type, Union
6
6
  from pydantic import BaseModel
7
7
 
8
- from scenario.testing_agent import TestingAgent
8
+ if TYPE_CHECKING:
9
+ from scenario.scenario_agent_adapter import ScenarioAgentAdapter
10
+
11
+ ScenarioAgentType = ScenarioAgentAdapter
12
+ else:
13
+ ScenarioAgentType = Any
14
+
9
15
 
10
16
  class ScenarioConfig(BaseModel):
11
17
  """
@@ -15,14 +21,19 @@ class ScenarioConfig(BaseModel):
15
21
  such as the LLM provider and model to use for the testing agent.
16
22
  """
17
23
 
18
- testing_agent: Optional[TestingAgent] = None
24
+ testing_agent: Optional[Type[ScenarioAgentType]] = None
19
25
  max_turns: Optional[int] = 10
20
26
  verbose: Optional[Union[bool, int]] = True
21
27
  cache_key: Optional[str] = None
22
28
  debug: Optional[bool] = False
23
29
 
24
30
  def merge(self, other: "ScenarioConfig") -> "ScenarioConfig":
25
- return ScenarioConfig(**{
26
- **self.model_dump(),
27
- **other.model_dump(exclude_none=True),
28
- })
31
+ return ScenarioConfig(
32
+ **{
33
+ **self.items(),
34
+ **other.items(),
35
+ }
36
+ )
37
+
38
+ def items(self):
39
+ return {k: getattr(self, k) for k in self.model_dump(exclude_none=True).keys()}
@@ -0,0 +1,134 @@
1
+ from textwrap import indent
2
+ from typing import Any
3
+ import termcolor
4
+
5
+
6
+ default_config_error_message = f"""
7
+
8
+ {termcolor.colored("->", "cyan")} Please set a default config with at least a testing_agent model for running your scenarios at the top of your test file, for example:
9
+
10
+ from scenario import Scenario, TestingAgent
11
+
12
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
13
+ {termcolor.colored("^" * 74, "green")}
14
+
15
+ @pytest.mark.agent_test
16
+ def test_vegetarian_recipe_agent():
17
+ scenario = Scenario(
18
+ # ...
19
+ )
20
+ result = scenario.run()
21
+
22
+ assert result.success
23
+
24
+
25
+ {termcolor.colored("->", "cyan")} Alternatively, you can set the config specifically for this scenario:
26
+
27
+ from scenario import Scenario, TestingAgent
28
+
29
+ @pytest.mark.agent_test
30
+ def test_vegetarian_recipe_agent():
31
+ scenario = Scenario(
32
+ # ...
33
+ testing_agent=TestingAgent(model="openai/gpt-4o-mini")
34
+ {termcolor.colored("^" * 54, "green")}
35
+ )
36
+ result = scenario.run()
37
+
38
+ assert result.success
39
+ """
40
+
41
+
42
+ testing_agent_not_configured_error_message = f"""
43
+
44
+ {termcolor.colored("->", "cyan")} Testing agent was initialized without a model, please set the model when defining the testing agent, for example:
45
+
46
+ TestingAgent.with_config(model="openai/gpt-4.1-mini")
47
+ {termcolor.colored("^" * 53, "green")}
48
+ """
49
+
50
+
51
+ def message_return_error_message(got: Any, class_name: str):
52
+ got_ = repr(got)
53
+ if len(got_) > 100:
54
+ got_ = got_[:100] + "..."
55
+
56
+ return f"""
57
+ {termcolor.colored("->", "cyan")} On the {termcolor.colored("call", "green")} method of the {class_name} agent adapter, you returned:
58
+
59
+ {indent(got_, ' ' * 4)}
60
+
61
+ {termcolor.colored("->", "cyan")} But the adapter should return either a string, a dict on the OpenAI messages format, or a list of messages in the OpenAI messages format so the testing agent can understand what happened. For example:
62
+
63
+ class MyAgentAdapter(ScenarioAgentAdapter):
64
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
65
+ response = call_my_agent(message)
66
+
67
+ return response.output_text
68
+ {termcolor.colored("^" * 27, "green")}
69
+
70
+ {termcolor.colored("->", "cyan")} Alternatively, you can return a list of messages in OpenAI messages format, this is useful for capturing tool calls and other before the final response:
71
+
72
+ class MyAgentAdapter(ScenarioAgentAdapter):
73
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
74
+ response = call_my_agent(message)
75
+
76
+ return [
77
+ {{"role": "assistant", "content": response.output_text}},
78
+ {termcolor.colored("^" * 55, "green")}
79
+ ]
80
+ """
81
+
82
+
83
+ def message_invalid_agent_type(got: Any):
84
+ got_ = repr(got)
85
+ if len(got_) > 100:
86
+ got_ = got_[:100] + "..."
87
+
88
+ return f"""
89
+ {termcolor.colored("->", "cyan")} The {termcolor.colored("agent", "green")} argument of Scenario needs to receive a class that inherits from {termcolor.colored("ScenarioAgentAdapter", "green")}, but you passed:
90
+
91
+ {indent(got_, ' ' * 4)}
92
+
93
+ {termcolor.colored("->", "cyan")} Instead, wrap your agent in a ScenarioAgentAdapter subclass. For example:
94
+
95
+ class MyAgentAdapter(ScenarioAgentAdapter):
96
+ {termcolor.colored("^" * 43, "green")}
97
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
98
+ response = call_my_agent(message)
99
+
100
+ return response.output_text
101
+
102
+ {termcolor.colored("->", "cyan")} And then you can use that on your scenario definition:
103
+
104
+ @pytest.mark.agent_test
105
+ def test_my_agent():
106
+ scenario = Scenario(
107
+ name="first scenario",
108
+ description=\"\"\"
109
+ Example scenario description to test your agent.
110
+ \"\"\",
111
+ agent=MyAgentAdapter,
112
+ {termcolor.colored("^" * 20, "green")}
113
+ criteria=[
114
+ "Requirement One",
115
+ "Requirement Two",
116
+ ],
117
+ )
118
+ result = scenario.run()
119
+
120
+ assert result.success
121
+ """
122
+
123
+
124
+ def agent_response_not_awaitable(class_name: str):
125
+ return f"""
126
+ {termcolor.colored("->", "cyan")} The {termcolor.colored("call", "green")} method of the {class_name} agent adapter returned a non-awaitable response, you probably forgot to add the {termcolor.colored("async", "green")} keyword to the method definition, make sure your code looks like this:
127
+
128
+ class {class_name}(ScenarioAgentAdapter):
129
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
130
+ {termcolor.colored("^" * 5, "green")}
131
+ response = call_my_agent(message)
132
+
133
+ return response.output_text
134
+ """
@@ -7,7 +7,7 @@ from typing import TypedDict
7
7
  import functools
8
8
  from termcolor import colored
9
9
 
10
- from scenario.result import ScenarioResult
10
+ from scenario.types import ScenarioResult
11
11
 
12
12
  from .scenario import Scenario
13
13