langwatch-scenario 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.1.2.dist-info → langwatch_scenario-0.1.3.dist-info}/METADATA +8 -2
- {langwatch_scenario-0.1.2.dist-info → langwatch_scenario-0.1.3.dist-info}/RECORD +7 -7
- scenario/pytest_plugin.py +19 -3
- scenario/scenario.py +2 -0
- {langwatch_scenario-0.1.2.dist-info → langwatch_scenario-0.1.3.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.1.2.dist-info → langwatch_scenario-0.1.3.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.1.2.dist-info → langwatch_scenario-0.1.3.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langwatch-scenario
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3
|
4
4
|
Summary: The end-to-end agent testing library
|
5
5
|
Author-email: LangWatch Team <support@langwatch.ai>
|
6
6
|
License: MIT
|
@@ -175,7 +175,7 @@ You can find a fully working Lovable Clone example in [examples/test_lovable_clo
|
|
175
175
|
|
176
176
|
## Debug mode
|
177
177
|
|
178
|
-
You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running.
|
178
|
+
You can enable debug mode by setting the `debug` field to `True` in the `Scenario.configure` method or in the specific scenario you are running, or by passing the `--debug` flag to pytest.
|
179
179
|
|
180
180
|
Debug mode allows you to see the messages in slow motion step by step, and intervene with your own inputs to debug your agent from the middle of the conversation.
|
181
181
|
|
@@ -183,6 +183,12 @@ Debug mode allows you to see the messages in slow motion step by step, and inter
|
|
183
183
|
Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"), debug=True)
|
184
184
|
```
|
185
185
|
|
186
|
+
or
|
187
|
+
|
188
|
+
```bash
|
189
|
+
pytest -s tests/test_vegetarian_recipe_agent.py --debug
|
190
|
+
```
|
191
|
+
|
186
192
|
## Cache
|
187
193
|
|
188
194
|
Each time the scenario runs, the testing agent might chose a different input to start, this is good to make sure it covers the variance of real users as well, however we understand that the non-deterministic nature of it might make it less repeatable, costly and harder to debug. To solve for it, you can use the `cache_key` field in the `Scenario.configure` method or in the specific scenario you are running, this will make the testing agent give the same input for given the same scenario:
|
@@ -2,14 +2,14 @@ scenario/__init__.py,sha256=LfCjOpbn55jYBBZHyMSZtRAWeCDFn4z4OhAyFnu8aMg,602
|
|
2
2
|
scenario/cache.py,sha256=sYu16SAf-BnVYkWSlEDzpyynJGIQyNYsgMXPgCqEnmk,1719
|
3
3
|
scenario/config.py,sha256=5UVBmuQDtni0Yu00bMh5p0xMGsrymYVRftXBGTsi2fI,802
|
4
4
|
scenario/error_messages.py,sha256=ZMcAOKJmKaLIinMZ0yBIOgDhPfeJH0uZxIEmolRArtc,2344
|
5
|
-
scenario/pytest_plugin.py,sha256=
|
5
|
+
scenario/pytest_plugin.py,sha256=BuBbyKLa-t9AFVn9EETl7OvGSt__dFO7KnbZynfS1UM,5789
|
6
6
|
scenario/result.py,sha256=SGF8uYNtkP7cJy4KsshUozZRevmdiyX2TFzr6VreTv8,2717
|
7
|
-
scenario/scenario.py,sha256=
|
7
|
+
scenario/scenario.py,sha256=tYn3Y1sK6_7pg7hFb_5w0TW6nun-za_4F8kqcnrXXU4,4077
|
8
8
|
scenario/scenario_executor.py,sha256=c8xV6GoJgO2JoZBWpYPQN5YwwQ3G9iJUtXV9UGSf1q8,7919
|
9
9
|
scenario/testing_agent.py,sha256=eS-c_io5cHgzJ88wwRvU_vve-pmB2HsGWN6qwlq0sPg,10865
|
10
10
|
scenario/utils.py,sha256=tMESosrxesA1B5zZB3IJ-sNSXDmnpNNib-DHobveVLA,3918
|
11
|
-
langwatch_scenario-0.1.
|
12
|
-
langwatch_scenario-0.1.
|
13
|
-
langwatch_scenario-0.1.
|
14
|
-
langwatch_scenario-0.1.
|
15
|
-
langwatch_scenario-0.1.
|
11
|
+
langwatch_scenario-0.1.3.dist-info/METADATA,sha256=7OIolGcZ3fkCXFmE6JHkckVCeJb1r3yYSYveJ6iE9zw,8801
|
12
|
+
langwatch_scenario-0.1.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
13
|
+
langwatch_scenario-0.1.3.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
|
14
|
+
langwatch_scenario-0.1.3.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
|
15
|
+
langwatch_scenario-0.1.3.dist-info/RECORD,,
|
scenario/pytest_plugin.py
CHANGED
@@ -11,14 +11,16 @@ from scenario.result import ScenarioResult
|
|
11
11
|
|
12
12
|
from .scenario import Scenario
|
13
13
|
|
14
|
+
|
14
15
|
class ScenarioReporterResults(TypedDict):
|
15
16
|
scenario: Scenario
|
16
17
|
result: ScenarioResult
|
17
18
|
|
19
|
+
|
18
20
|
# ScenarioReporter class definition moved outside the fixture for global use
|
19
21
|
class ScenarioReporter:
|
20
22
|
def __init__(self):
|
21
|
-
self.results
|
23
|
+
self.results: list[ScenarioReporterResults] = []
|
22
24
|
|
23
25
|
def add_result(self, scenario, result):
|
24
26
|
"""Add a test result to the reporter."""
|
@@ -83,7 +85,12 @@ class ScenarioReporter:
|
|
83
85
|
f"\n{idx}. {scenario.description} - {colored(status, status_color, attrs=['bold'])}{time}"
|
84
86
|
)
|
85
87
|
|
86
|
-
print(
|
88
|
+
print(
|
89
|
+
colored(
|
90
|
+
f" Reasoning: {result.reasoning}",
|
91
|
+
"green" if result.success else "red",
|
92
|
+
)
|
93
|
+
)
|
87
94
|
|
88
95
|
if hasattr(result, "met_criteria") and result.met_criteria:
|
89
96
|
criteria_count = len(result.met_criteria)
|
@@ -119,6 +126,10 @@ def pytest_configure(config):
|
|
119
126
|
"markers", "agent_test: mark test as an agent scenario test"
|
120
127
|
)
|
121
128
|
|
129
|
+
if config.getoption("--debug"):
|
130
|
+
print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
|
131
|
+
Scenario.configure(verbose=True, debug=True)
|
132
|
+
|
122
133
|
# Create a global reporter instance
|
123
134
|
config._scenario_reporter = ScenarioReporter()
|
124
135
|
|
@@ -128,7 +139,12 @@ def pytest_configure(config):
|
|
128
139
|
result = await original_run(self, *args, **kwargs)
|
129
140
|
|
130
141
|
# Always report to the global reporter
|
131
|
-
|
142
|
+
# Ensure the reporter exists before adding result
|
143
|
+
if hasattr(config, "_scenario_reporter"):
|
144
|
+
config._scenario_reporter.add_result(self, result)
|
145
|
+
else:
|
146
|
+
# Handle case where reporter might not be initialized (should not happen with current setup)
|
147
|
+
print(colored("Warning: Scenario reporter not found during run.", "yellow"))
|
132
148
|
|
133
149
|
return result
|
134
150
|
|
scenario/scenario.py
CHANGED
@@ -105,6 +105,7 @@ class Scenario(ScenarioConfig):
|
|
105
105
|
max_turns: Optional[int] = None,
|
106
106
|
verbose: Optional[Union[bool, int]] = None,
|
107
107
|
cache_key: Optional[str] = None,
|
108
|
+
debug: Optional[bool] = None,
|
108
109
|
) -> None:
|
109
110
|
existing_config = getattr(cls, "default_config", ScenarioConfig())
|
110
111
|
|
@@ -114,5 +115,6 @@ class Scenario(ScenarioConfig):
|
|
114
115
|
max_turns=max_turns,
|
115
116
|
verbose=verbose,
|
116
117
|
cache_key=cache_key,
|
118
|
+
debug=debug,
|
117
119
|
)
|
118
120
|
)
|
File without changes
|
File without changes
|
File without changes
|