langwatch-scenario 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ Metadata-Version: 2.4
2
+ Name: langwatch-scenario
3
+ Version: 0.1.0
4
+ Summary: The end-to-end agent testing library
5
+ Author-email: LangWatch Team <support@langwatch.ai>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/langwatch/scenario
8
+ Project-URL: Bug Tracker, https://github.com/langwatch/scenario/issues
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: pytest>=8.1.1
20
+ Requires-Dist: litellm>=1.49.0
21
+ Requires-Dist: python-dotenv>=1.0.1
22
+ Requires-Dist: termcolor>=2.4.0
23
+ Requires-Dist: pydantic>=2.7.0
24
+ Requires-Dist: joblib>=1.4.2
25
+ Requires-Dist: wrapt>=1.17.2
26
+ Requires-Dist: pytest-asyncio>=0.26.0
27
+ Requires-Dist: rich>=14.0.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: black; extra == "dev"
30
+ Requires-Dist: isort; extra == "dev"
31
+ Requires-Dist: mypy; extra == "dev"
32
+ Requires-Dist: pytest-cov; extra == "dev"
33
+
34
+ ![scenario](./assets/scenario-wide.webp)
35
+
36
+ <div align="center">
37
+ <!-- Discord, PyPI, Docs, etc links -->
38
+ </div>
39
+
40
+ # Scenario: Use an Agent to test your Agent
41
+
42
+ Scenario is a library for testing agents end-to-end as a human would, but without having to manually do it. The automated testing agent covers every single scenario for you.
43
+
44
+ You define the scenarios, and the testing agent will simulate your users as it follows them, it will keep chatting and evaluating your agent until it reaches the desired goal or detects an unexpected behavior.
45
+
46
+ ## Getting Started
47
+
48
+ Install pytest and scenario:
49
+
50
+ ```bash
51
+ pip install pytest langwatch-scenario
52
+ ```
53
+
54
+ Now create your first scenario:
55
+
56
+ ```python
57
+ import pytest
58
+
59
+ from scenario import Scenario, TestingAgent
60
+
61
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
62
+
63
+
64
+ @pytest.mark.agent_test
65
+ @pytest.mark.asyncio
66
+ async def test_vegetarian_recipe_agent():
67
+ def vegetarian_recipe_agent(message, context):
68
+ # Call your agent here
69
+ response = "<Your agent's response>"
70
+
71
+ return {"message": response}
72
+
73
+ scenario = Scenario(
74
+ "User is looking for a dinner idea",
75
+ agent=vegetarian_recipe_agent,
76
+ success_criteria=[
77
+ "Recipe agent generates a vegetarian recipe",
78
+ "Recipe includes step-by-step cooking instructions",
79
+ ],
80
+ failure_criteria=[
81
+ "The recipe includes meat",
82
+ "The agent asks more than two follow-up questions",
83
+ ],
84
+ )
85
+
86
+ result = await scenario.run()
87
+
88
+ assert result.success
89
+ ```
90
+
91
+ Save it as `tests/test_vegetarian_recipe_agent.py` and run it with pytest:
92
+
93
+ ```bash
94
+ pytest -s tests/test_vegetarian_recipe_agent.py
95
+ ```
96
+
97
+ Once you connect to callback to a real agent, this is how it will look like:
98
+
99
+ [![asciicast](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11.svg)](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
100
+
101
+ You can find a fully working example in [examples/test_vegetarian_recipe_agent.py](examples/test_vegetarian_recipe_agent.py).
102
+
103
+ ## Customize strategy and max_turns
104
+
105
+ You can customize how should the testing agent go about testing by defining a `strategy` field. You can also limit the maximum number of turns the scenario will take by setting the `max_turns` field (defaults to 10).
106
+
107
+ For example, in this Lovable Clone scenario test:
108
+
109
+ ```python
110
+ scenario = Scenario(
111
+ "user wants to create a new landing page for their dog walking startup",
112
+ agent=lovable_agent,
113
+ strategy="send the first message to generate the landing page, then a single follow up request to extend it, then give your final verdict",
114
+ success_criteria=[
115
+ "agent reads the files before go and making changes",
116
+ "agent modified the index.css file",
117
+ "agent modified the Index.tsx file",
118
+ "agent created a comprehensive landing page",
119
+ "agent extended the landing page with a new section",
120
+ ],
121
+ failure_criteria=[
122
+ "agent says it can't read the file",
123
+ "agent produces incomplete code or is too lazy to finish",
124
+ ],
125
+ max_turns=5,
126
+ )
127
+
128
+ result = await scenario.run()
129
+ ```
130
+
131
+ You can find a fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
132
+
133
+ ## Debug mode
134
+
135
+ You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running.
136
+
137
+ Debug mode allows you to see the messages in slow motion step by step, and intervene with your own inputs to debug your agent from the middle of the conversation.
138
+
139
+ ```python
140
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"), debug=True)
141
+ ```
142
+
143
+ ## Cache
144
+
145
+ Each time the scenario runs, the testing agent might chose a different input to start, this is good to make sure it covers the variance of real users as well, however we understand that the non-deterministic nature of it might make it less repeatable, costly and harder to debug. To solve for it, you can use the `cache_key` field in the `Scenario.configure` method or in the specific scenario you are running, this will make the testing agent give the same input for given the same scenario:
146
+
147
+ ```python
148
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"), cache_key="42")
149
+ ```
150
+
151
+ To bust the cache, you can simply pass a different `cache_key`, disable it, or delete the cache files located at `~/.scenario/cache`.
152
+
153
+ To go a step further and fully cache the test end-to-end, you can also wrap the LLM calls or any other non-deterministic functions in your application side with the `@scenario_cache` decorator:
154
+
155
+ ```python
156
+ class MyAgent:
157
+ @scenario_cache(ignore=["self"])
158
+ def invoke(self, message, context):
159
+ return client.chat.completions.create(
160
+ # ...
161
+ )
162
+ ```
163
+
164
+ This will cache any function call you decorate when running the tests and make them repeatable, hashed by the function arguments, the scenario being executed, and the `cache_key` you provided. You can exclude arguments that should not be hashed for the cache key by naming them in the `ignore` argument.
165
+
166
+ ## Disable Output
167
+
168
+ You can remove the `-s` flag from pytest to hide the output during test, which will only show up if the test fails. Alternatively, you can set `verbose=False` in the `Scenario.configure` method or in the specific scenario you are running.
169
+
170
+ ## Running in parallel
171
+
172
+ As the number of your scenarios grows, you might want to run them in parallel to speed up your whole test suite. We suggest you to use the [pytest-asyncio-concurrent](https://pypi.org/project/pytest-asyncio-concurrent/) plugin to do so.
173
+
174
+ Simply install the plugin from the link above, then replace the `@pytest.mark.asyncio` annotation in the tests with `@pytest.mark.asyncio_concurrent`, adding a group name to it to mark the group of scenarions that should be run in parallel together, e.g.:
175
+
176
+ ```python
177
+ @pytest.mark.agent_test
178
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
179
+ async def test_vegetarian_recipe_agent():
180
+ # ...
181
+
182
+ @pytest.mark.agent_test
183
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
184
+ async def test_user_is_very_hungry():
185
+ # ...
186
+ ```
187
+
188
+ Those two scenarios should now run in parallel.
189
+
190
+ ## License
191
+
192
+ MIT License
@@ -0,0 +1,159 @@
1
+ ![scenario](./assets/scenario-wide.webp)
2
+
3
+ <div align="center">
4
+ <!-- Discord, PyPI, Docs, etc links -->
5
+ </div>
6
+
7
+ # Scenario: Use an Agent to test your Agent
8
+
9
+ Scenario is a library for testing agents end-to-end as a human would, but without having to manually do it. The automated testing agent covers every single scenario for you.
10
+
11
+ You define the scenarios, and the testing agent will simulate your users as it follows them, it will keep chatting and evaluating your agent until it reaches the desired goal or detects an unexpected behavior.
12
+
13
+ ## Getting Started
14
+
15
+ Install pytest and scenario:
16
+
17
+ ```bash
18
+ pip install pytest langwatch-scenario
19
+ ```
20
+
21
+ Now create your first scenario:
22
+
23
+ ```python
24
+ import pytest
25
+
26
+ from scenario import Scenario, TestingAgent
27
+
28
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
29
+
30
+
31
+ @pytest.mark.agent_test
32
+ @pytest.mark.asyncio
33
+ async def test_vegetarian_recipe_agent():
34
+ def vegetarian_recipe_agent(message, context):
35
+ # Call your agent here
36
+ response = "<Your agent's response>"
37
+
38
+ return {"message": response}
39
+
40
+ scenario = Scenario(
41
+ "User is looking for a dinner idea",
42
+ agent=vegetarian_recipe_agent,
43
+ success_criteria=[
44
+ "Recipe agent generates a vegetarian recipe",
45
+ "Recipe includes step-by-step cooking instructions",
46
+ ],
47
+ failure_criteria=[
48
+ "The recipe includes meat",
49
+ "The agent asks more than two follow-up questions",
50
+ ],
51
+ )
52
+
53
+ result = await scenario.run()
54
+
55
+ assert result.success
56
+ ```
57
+
58
+ Save it as `tests/test_vegetarian_recipe_agent.py` and run it with pytest:
59
+
60
+ ```bash
61
+ pytest -s tests/test_vegetarian_recipe_agent.py
62
+ ```
63
+
64
+ Once you connect to callback to a real agent, this is how it will look like:
65
+
66
+ [![asciicast](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11.svg)](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
67
+
68
+ You can find a fully working example in [examples/test_vegetarian_recipe_agent.py](examples/test_vegetarian_recipe_agent.py).
69
+
70
+ ## Customize strategy and max_turns
71
+
72
+ You can customize how should the testing agent go about testing by defining a `strategy` field. You can also limit the maximum number of turns the scenario will take by setting the `max_turns` field (defaults to 10).
73
+
74
+ For example, in this Lovable Clone scenario test:
75
+
76
+ ```python
77
+ scenario = Scenario(
78
+ "user wants to create a new landing page for their dog walking startup",
79
+ agent=lovable_agent,
80
+ strategy="send the first message to generate the landing page, then a single follow up request to extend it, then give your final verdict",
81
+ success_criteria=[
82
+ "agent reads the files before go and making changes",
83
+ "agent modified the index.css file",
84
+ "agent modified the Index.tsx file",
85
+ "agent created a comprehensive landing page",
86
+ "agent extended the landing page with a new section",
87
+ ],
88
+ failure_criteria=[
89
+ "agent says it can't read the file",
90
+ "agent produces incomplete code or is too lazy to finish",
91
+ ],
92
+ max_turns=5,
93
+ )
94
+
95
+ result = await scenario.run()
96
+ ```
97
+
98
+ You can find a fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
99
+
100
+ ## Debug mode
101
+
102
+ You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running.
103
+
104
+ Debug mode allows you to see the messages in slow motion step by step, and intervene with your own inputs to debug your agent from the middle of the conversation.
105
+
106
+ ```python
107
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"), debug=True)
108
+ ```
109
+
110
+ ## Cache
111
+
112
+ Each time the scenario runs, the testing agent might chose a different input to start, this is good to make sure it covers the variance of real users as well, however we understand that the non-deterministic nature of it might make it less repeatable, costly and harder to debug. To solve for it, you can use the `cache_key` field in the `Scenario.configure` method or in the specific scenario you are running, this will make the testing agent give the same input for given the same scenario:
113
+
114
+ ```python
115
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"), cache_key="42")
116
+ ```
117
+
118
+ To bust the cache, you can simply pass a different `cache_key`, disable it, or delete the cache files located at `~/.scenario/cache`.
119
+
120
+ To go a step further and fully cache the test end-to-end, you can also wrap the LLM calls or any other non-deterministic functions in your application side with the `@scenario_cache` decorator:
121
+
122
+ ```python
123
+ class MyAgent:
124
+ @scenario_cache(ignore=["self"])
125
+ def invoke(self, message, context):
126
+ return client.chat.completions.create(
127
+ # ...
128
+ )
129
+ ```
130
+
131
+ This will cache any function call you decorate when running the tests and make them repeatable, hashed by the function arguments, the scenario being executed, and the `cache_key` you provided. You can exclude arguments that should not be hashed for the cache key by naming them in the `ignore` argument.
132
+
133
+ ## Disable Output
134
+
135
+ You can remove the `-s` flag from pytest to hide the output during test, which will only show up if the test fails. Alternatively, you can set `verbose=False` in the `Scenario.configure` method or in the specific scenario you are running.
136
+
137
+ ## Running in parallel
138
+
139
+ As the number of your scenarios grows, you might want to run them in parallel to speed up your whole test suite. We suggest you to use the [pytest-asyncio-concurrent](https://pypi.org/project/pytest-asyncio-concurrent/) plugin to do so.
140
+
141
+ Simply install the plugin from the link above, then replace the `@pytest.mark.asyncio` annotation in the tests with `@pytest.mark.asyncio_concurrent`, adding a group name to it to mark the group of scenarions that should be run in parallel together, e.g.:
142
+
143
+ ```python
144
+ @pytest.mark.agent_test
145
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
146
+ async def test_vegetarian_recipe_agent():
147
+ # ...
148
+
149
+ @pytest.mark.agent_test
150
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
151
+ async def test_user_is_very_hungry():
152
+ # ...
153
+ ```
154
+
155
+ Those two scenarios should now run in parallel.
156
+
157
+ ## License
158
+
159
+ MIT License
@@ -0,0 +1,192 @@
1
+ Metadata-Version: 2.4
2
+ Name: langwatch-scenario
3
+ Version: 0.1.0
4
+ Summary: The end-to-end agent testing library
5
+ Author-email: LangWatch Team <support@langwatch.ai>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/langwatch/scenario
8
+ Project-URL: Bug Tracker, https://github.com/langwatch/scenario/issues
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: pytest>=8.1.1
20
+ Requires-Dist: litellm>=1.49.0
21
+ Requires-Dist: python-dotenv>=1.0.1
22
+ Requires-Dist: termcolor>=2.4.0
23
+ Requires-Dist: pydantic>=2.7.0
24
+ Requires-Dist: joblib>=1.4.2
25
+ Requires-Dist: wrapt>=1.17.2
26
+ Requires-Dist: pytest-asyncio>=0.26.0
27
+ Requires-Dist: rich>=14.0.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: black; extra == "dev"
30
+ Requires-Dist: isort; extra == "dev"
31
+ Requires-Dist: mypy; extra == "dev"
32
+ Requires-Dist: pytest-cov; extra == "dev"
33
+
34
+ ![scenario](./assets/scenario-wide.webp)
35
+
36
+ <div align="center">
37
+ <!-- Discord, PyPI, Docs, etc links -->
38
+ </div>
39
+
40
+ # Scenario: Use an Agent to test your Agent
41
+
42
+ Scenario is a library for testing agents end-to-end as a human would, but without having to manually do it. The automated testing agent covers every single scenario for you.
43
+
44
+ You define the scenarios, and the testing agent will simulate your users as it follows them, it will keep chatting and evaluating your agent until it reaches the desired goal or detects an unexpected behavior.
45
+
46
+ ## Getting Started
47
+
48
+ Install pytest and scenario:
49
+
50
+ ```bash
51
+ pip install pytest langwatch-scenario
52
+ ```
53
+
54
+ Now create your first scenario:
55
+
56
+ ```python
57
+ import pytest
58
+
59
+ from scenario import Scenario, TestingAgent
60
+
61
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
62
+
63
+
64
+ @pytest.mark.agent_test
65
+ @pytest.mark.asyncio
66
+ async def test_vegetarian_recipe_agent():
67
+ def vegetarian_recipe_agent(message, context):
68
+ # Call your agent here
69
+ response = "<Your agent's response>"
70
+
71
+ return {"message": response}
72
+
73
+ scenario = Scenario(
74
+ "User is looking for a dinner idea",
75
+ agent=vegetarian_recipe_agent,
76
+ success_criteria=[
77
+ "Recipe agent generates a vegetarian recipe",
78
+ "Recipe includes step-by-step cooking instructions",
79
+ ],
80
+ failure_criteria=[
81
+ "The recipe includes meat",
82
+ "The agent asks more than two follow-up questions",
83
+ ],
84
+ )
85
+
86
+ result = await scenario.run()
87
+
88
+ assert result.success
89
+ ```
90
+
91
+ Save it as `tests/test_vegetarian_recipe_agent.py` and run it with pytest:
92
+
93
+ ```bash
94
+ pytest -s tests/test_vegetarian_recipe_agent.py
95
+ ```
96
+
97
+ Once you connect to callback to a real agent, this is how it will look like:
98
+
99
+ [![asciicast](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11.svg)](https://asciinema.org/a/nvO5GWGzqKTTCd8gtNSezQw11)
100
+
101
+ You can find a fully working example in [examples/test_vegetarian_recipe_agent.py](examples/test_vegetarian_recipe_agent.py).
102
+
103
+ ## Customize strategy and max_turns
104
+
105
+ You can customize how should the testing agent go about testing by defining a `strategy` field. You can also limit the maximum number of turns the scenario will take by setting the `max_turns` field (defaults to 10).
106
+
107
+ For example, in this Lovable Clone scenario test:
108
+
109
+ ```python
110
+ scenario = Scenario(
111
+ "user wants to create a new landing page for their dog walking startup",
112
+ agent=lovable_agent,
113
+ strategy="send the first message to generate the landing page, then a single follow up request to extend it, then give your final verdict",
114
+ success_criteria=[
115
+ "agent reads the files before go and making changes",
116
+ "agent modified the index.css file",
117
+ "agent modified the Index.tsx file",
118
+ "agent created a comprehensive landing page",
119
+ "agent extended the landing page with a new section",
120
+ ],
121
+ failure_criteria=[
122
+ "agent says it can't read the file",
123
+ "agent produces incomplete code or is too lazy to finish",
124
+ ],
125
+ max_turns=5,
126
+ )
127
+
128
+ result = await scenario.run()
129
+ ```
130
+
131
+ You can find a fully working Lovable Clone example in [examples/test_lovable_clone.py](examples/test_lovable_clone.py).
132
+
133
+ ## Debug mode
134
+
135
+ You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running.
136
+
137
+ Debug mode allows you to see the messages in slow motion step by step, and intervene with your own inputs to debug your agent from the middle of the conversation.
138
+
139
+ ```python
140
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"), debug=True)
141
+ ```
142
+
143
+ ## Cache
144
+
145
+ Each time the scenario runs, the testing agent might chose a different input to start, this is good to make sure it covers the variance of real users as well, however we understand that the non-deterministic nature of it might make it less repeatable, costly and harder to debug. To solve for it, you can use the `cache_key` field in the `Scenario.configure` method or in the specific scenario you are running, this will make the testing agent give the same input for given the same scenario:
146
+
147
+ ```python
148
+ Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"), cache_key="42")
149
+ ```
150
+
151
+ To bust the cache, you can simply pass a different `cache_key`, disable it, or delete the cache files located at `~/.scenario/cache`.
152
+
153
+ To go a step further and fully cache the test end-to-end, you can also wrap the LLM calls or any other non-deterministic functions in your application side with the `@scenario_cache` decorator:
154
+
155
+ ```python
156
+ class MyAgent:
157
+ @scenario_cache(ignore=["self"])
158
+ def invoke(self, message, context):
159
+ return client.chat.completions.create(
160
+ # ...
161
+ )
162
+ ```
163
+
164
+ This will cache any function call you decorate when running the tests and make them repeatable, hashed by the function arguments, the scenario being executed, and the `cache_key` you provided. You can exclude arguments that should not be hashed for the cache key by naming them in the `ignore` argument.
165
+
166
+ ## Disable Output
167
+
168
+ You can remove the `-s` flag from pytest to hide the output during test, which will only show up if the test fails. Alternatively, you can set `verbose=False` in the `Scenario.configure` method or in the specific scenario you are running.
169
+
170
+ ## Running in parallel
171
+
172
+ As the number of your scenarios grows, you might want to run them in parallel to speed up your whole test suite. We suggest you to use the [pytest-asyncio-concurrent](https://pypi.org/project/pytest-asyncio-concurrent/) plugin to do so.
173
+
174
+ Simply install the plugin from the link above, then replace the `@pytest.mark.asyncio` annotation in the tests with `@pytest.mark.asyncio_concurrent`, adding a group name to it to mark the group of scenarions that should be run in parallel together, e.g.:
175
+
176
+ ```python
177
+ @pytest.mark.agent_test
178
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
179
+ async def test_vegetarian_recipe_agent():
180
+ # ...
181
+
182
+ @pytest.mark.agent_test
183
+ @pytest.mark.asyncio_concurrent(group="vegetarian_recipe_agent")
184
+ async def test_user_is_very_hungry():
185
+ # ...
186
+ ```
187
+
188
+ Those two scenarios should now run in parallel.
189
+
190
+ ## License
191
+
192
+ MIT License
@@ -0,0 +1,19 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ langwatch_scenario.egg-info/PKG-INFO
5
+ langwatch_scenario.egg-info/SOURCES.txt
6
+ langwatch_scenario.egg-info/dependency_links.txt
7
+ langwatch_scenario.egg-info/entry_points.txt
8
+ langwatch_scenario.egg-info/requires.txt
9
+ langwatch_scenario.egg-info/top_level.txt
10
+ scenario/__init__.py
11
+ scenario/cache.py
12
+ scenario/config.py
13
+ scenario/error_messages.py
14
+ scenario/pytest_plugin.py
15
+ scenario/result.py
16
+ scenario/scenario.py
17
+ scenario/scenario_executor.py
18
+ scenario/testing_agent.py
19
+ scenario/utils.py
@@ -0,0 +1,2 @@
1
+ [pytest11]
2
+ scenario = scenario.pytest_plugin
@@ -0,0 +1,15 @@
1
+ pytest>=8.1.1
2
+ litellm>=1.49.0
3
+ python-dotenv>=1.0.1
4
+ termcolor>=2.4.0
5
+ pydantic>=2.7.0
6
+ joblib>=1.4.2
7
+ wrapt>=1.17.2
8
+ pytest-asyncio>=0.26.0
9
+ rich>=14.0.0
10
+
11
+ [dev]
12
+ black
13
+ isort
14
+ mypy
15
+ pytest-cov
@@ -0,0 +1,58 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "langwatch-scenario"
7
+ version = "0.1.0"
8
+ description = "The end-to-end agent testing library"
9
+ readme = "README.md"
10
+ authors = [
11
+ {name = "LangWatch Team", email = "support@langwatch.ai"}
12
+ ]
13
+ license = {text = "MIT"}
14
+ requires-python = ">=3.9"
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ ]
25
+ dependencies = [
26
+ "pytest>=8.1.1",
27
+ "litellm>=1.49.0",
28
+ "python-dotenv>=1.0.1",
29
+ "termcolor>=2.4.0",
30
+ "pydantic>=2.7.0",
31
+ "joblib>=1.4.2",
32
+ "wrapt>=1.17.2",
33
+ "pytest-asyncio>=0.26.0",
34
+ "rich>=14.0.0",
35
+ ]
36
+
37
+ [project.optional-dependencies]
38
+ dev = [
39
+ "black",
40
+ "isort",
41
+ "mypy",
42
+ "pytest-cov",
43
+ ]
44
+
45
+ [project.urls]
46
+ "Homepage" = "https://github.com/langwatch/scenario"
47
+ "Bug Tracker" = "https://github.com/langwatch/scenario/issues"
48
+
49
+ [tool.pytest.ini_options]
50
+ markers = [
51
+ "agent_test: marks tests as agent scenario tests",
52
+ ]
53
+
54
+ [dependency-groups]
55
+ dev = [
56
+ "pydantic-ai>=0.0.52",
57
+ "pytest-asyncio-concurrent>=0.4.1",
58
+ ]
@@ -0,0 +1,26 @@
1
+ """
2
+ Scenario: A testing library for conversational agents.
3
+ """
4
+
5
+ # First import non-dependent modules
6
+ from .result import ScenarioResult
7
+ from .config import ScenarioConfig
8
+
9
+ # Then import modules with dependencies
10
+ from .testing_agent import TestingAgent
11
+ from .scenario import Scenario
12
+ from .cache import scenario_cache
13
+
14
+ # Import pytest plugin components
15
+ from .pytest_plugin import pytest_configure, scenario_reporter
16
+
17
+ __all__ = [
18
+ "Scenario",
19
+ "TestingAgent",
20
+ "ScenarioResult",
21
+ "ScenarioConfig",
22
+ "pytest_configure",
23
+ "scenario_reporter",
24
+ "scenario_cache",
25
+ ]
26
+ __version__ = "0.1.0"