langwatch-scenario 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/METADATA +140 -79
- langwatch_scenario-0.4.0.dist-info/RECORD +18 -0
- scenario/__init__.py +223 -9
- scenario/agent_adapter.py +111 -0
- scenario/cache.py +132 -8
- scenario/config.py +154 -10
- scenario/error_messages.py +8 -38
- scenario/judge_agent.py +435 -0
- scenario/pytest_plugin.py +223 -15
- scenario/scenario_executor.py +428 -136
- scenario/scenario_state.py +205 -0
- scenario/script.py +361 -0
- scenario/types.py +193 -20
- scenario/user_simulator_agent.py +249 -0
- scenario/utils.py +252 -2
- langwatch_scenario-0.3.0.dist-info/RECORD +0 -16
- scenario/scenario.py +0 -238
- scenario/scenario_agent_adapter.py +0 -16
- scenario/testing_agent.py +0 -279
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/top_level.txt +0 -0
scenario/pytest_plugin.py
CHANGED
@@ -1,33 +1,129 @@
|
|
1
1
|
"""
|
2
2
|
Pytest plugin for Scenario testing library.
|
3
|
+
|
4
|
+
This module provides pytest integration for the Scenario framework, including
|
5
|
+
automatic test reporting, debug mode support, and collection of scenario
|
6
|
+
results across test runs. It enables seamless integration with existing
|
7
|
+
pytest-based testing workflows.
|
3
8
|
"""
|
4
9
|
|
5
10
|
import pytest
|
6
|
-
from typing import TypedDict
|
11
|
+
from typing import TypedDict, List, Tuple
|
7
12
|
import functools
|
8
13
|
from termcolor import colored
|
9
14
|
|
15
|
+
from scenario.config import ScenarioConfig
|
10
16
|
from scenario.types import ScenarioResult
|
11
17
|
|
12
|
-
from .
|
18
|
+
from .scenario_executor import ScenarioExecutor
|
19
|
+
import scenario
|
13
20
|
|
14
21
|
|
15
22
|
class ScenarioReporterResults(TypedDict):
|
16
|
-
|
23
|
+
"""
|
24
|
+
Type definition for scenario test results stored by the reporter.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
scenario: The ScenarioExecutor instance that ran the test
|
28
|
+
result: The ScenarioResult containing test outcome and details
|
29
|
+
"""
|
30
|
+
|
31
|
+
scenario: ScenarioExecutor
|
17
32
|
result: ScenarioResult
|
18
33
|
|
19
34
|
|
20
35
|
# ScenarioReporter class definition moved outside the fixture for global use
|
21
36
|
class ScenarioReporter:
|
37
|
+
"""
|
38
|
+
Collects and reports on scenario test results across a pytest session.
|
39
|
+
|
40
|
+
This class automatically collects results from all scenario tests run during
|
41
|
+
a pytest session and provides comprehensive reporting including success rates,
|
42
|
+
timing information, and detailed failure analysis.
|
43
|
+
|
44
|
+
The reporter is automatically instantiated by the pytest plugin and collects
|
45
|
+
results from all scenario.run() calls without requiring explicit user setup.
|
46
|
+
|
47
|
+
Attributes:
|
48
|
+
results: List of all scenario test results collected during the session
|
49
|
+
|
50
|
+
Example:
|
51
|
+
The reporter is used automatically, but you can access it in tests:
|
52
|
+
|
53
|
+
```python
|
54
|
+
def test_my_scenarios(scenario_reporter):
|
55
|
+
# Run your scenarios
|
56
|
+
result1 = await scenario.run(...)
|
57
|
+
result2 = await scenario.run(...)
|
58
|
+
|
59
|
+
# Check collected results
|
60
|
+
assert len(scenario_reporter.results) == 2
|
61
|
+
|
62
|
+
# Get summary statistics
|
63
|
+
summary = scenario_reporter.get_summary()
|
64
|
+
print(f"Success rate: {summary['success_rate']}%")
|
65
|
+
```
|
66
|
+
"""
|
67
|
+
|
22
68
|
def __init__(self):
|
69
|
+
"""Initialize an empty scenario reporter."""
|
23
70
|
self.results: list[ScenarioReporterResults] = []
|
24
71
|
|
25
|
-
def add_result(self, scenario, result):
|
26
|
-
"""
|
72
|
+
def add_result(self, scenario: ScenarioExecutor, result: ScenarioResult):
|
73
|
+
"""
|
74
|
+
Add a test result to the reporter.
|
75
|
+
|
76
|
+
This method is called automatically by the pytest plugin whenever
|
77
|
+
a scenario.run() call completes. It stores both the scenario
|
78
|
+
configuration and the test result for later reporting.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
scenario: The ScenarioExecutor instance that ran the test
|
82
|
+
result: The ScenarioResult containing test outcome and details
|
83
|
+
|
84
|
+
Example:
|
85
|
+
```python
|
86
|
+
# This happens automatically when you run scenarios
|
87
|
+
result = await scenario.run(
|
88
|
+
name="my test",
|
89
|
+
description="Test description",
|
90
|
+
agents=[
|
91
|
+
my_agent,
|
92
|
+
scenario.UserSimulatorAgent(),
|
93
|
+
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
94
|
+
]
|
95
|
+
)
|
96
|
+
# Result is automatically added to the global reporter
|
97
|
+
```
|
98
|
+
"""
|
27
99
|
self.results.append({"scenario": scenario, "result": result})
|
28
100
|
|
29
101
|
def get_summary(self):
|
30
|
-
"""
|
102
|
+
"""
|
103
|
+
Get a summary of all test results.
|
104
|
+
|
105
|
+
Calculates aggregate statistics across all scenario tests that
|
106
|
+
have been run during the current pytest session.
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
Dictionary containing summary statistics:
|
110
|
+
- total: Total number of scenarios run
|
111
|
+
- passed: Number of scenarios that passed
|
112
|
+
- failed: Number of scenarios that failed
|
113
|
+
- success_rate: Percentage of scenarios that passed (0-100)
|
114
|
+
|
115
|
+
Example:
|
116
|
+
```python
|
117
|
+
def test_summary_check(scenario_reporter):
|
118
|
+
# Run some scenarios...
|
119
|
+
await scenario.run(...)
|
120
|
+
await scenario.run(...)
|
121
|
+
|
122
|
+
summary = scenario_reporter.get_summary()
|
123
|
+
assert summary['total'] == 2
|
124
|
+
assert summary['success_rate'] >= 80 # Require 80% success rate
|
125
|
+
```
|
126
|
+
"""
|
31
127
|
total = len(self.results)
|
32
128
|
passed = sum(1 for r in self.results if r["result"].success)
|
33
129
|
failed = total - passed
|
@@ -40,7 +136,36 @@ class ScenarioReporter:
|
|
40
136
|
}
|
41
137
|
|
42
138
|
def print_report(self):
|
43
|
-
"""
|
139
|
+
"""
|
140
|
+
Print a detailed report of all test results.
|
141
|
+
|
142
|
+
Outputs a comprehensive report to the console showing:
|
143
|
+
- Overall summary statistics
|
144
|
+
- Individual scenario results with success/failure status
|
145
|
+
- Detailed reasoning for each scenario outcome
|
146
|
+
- Timing information when available
|
147
|
+
- Criteria pass/fail breakdown for judge-evaluated scenarios
|
148
|
+
|
149
|
+
The report is automatically printed at the end of pytest sessions,
|
150
|
+
but can also be called manually for intermediate reporting.
|
151
|
+
|
152
|
+
Example output:
|
153
|
+
```
|
154
|
+
=== Scenario Test Report ===
|
155
|
+
Total Scenarios: 5
|
156
|
+
Passed: 4
|
157
|
+
Failed: 1
|
158
|
+
Success Rate: 80%
|
159
|
+
|
160
|
+
1. weather query test - PASSED in 2.34s (agent: 1.12s)
|
161
|
+
Reasoning: Agent successfully provided weather information
|
162
|
+
Passed Criteria: 2/2
|
163
|
+
|
164
|
+
2. complex math problem - FAILED in 5.67s (agent: 3.45s)
|
165
|
+
Reasoning: Agent provided incorrect calculation
|
166
|
+
Failed Criteria: 1
|
167
|
+
```
|
168
|
+
"""
|
44
169
|
if not self.results:
|
45
170
|
return # Skip report if no results
|
46
171
|
|
@@ -94,7 +219,9 @@ class ScenarioReporter:
|
|
94
219
|
|
95
220
|
if hasattr(result, "passed_criteria") and result.passed_criteria:
|
96
221
|
criteria_count = len(result.passed_criteria)
|
97
|
-
total_criteria = len(
|
222
|
+
total_criteria = len(result.passed_criteria) + len(
|
223
|
+
result.failed_criteria
|
224
|
+
)
|
98
225
|
criteria_color = (
|
99
226
|
"green" if criteria_count == total_criteria else "yellow"
|
100
227
|
)
|
@@ -115,12 +242,40 @@ class ScenarioReporter:
|
|
115
242
|
|
116
243
|
|
117
244
|
# Store the original run method
|
118
|
-
original_run =
|
245
|
+
original_run = ScenarioExecutor._run
|
119
246
|
|
120
247
|
|
121
248
|
@pytest.hookimpl(trylast=True)
|
122
249
|
def pytest_configure(config):
|
123
|
-
"""
|
250
|
+
"""
|
251
|
+
Configure pytest integration for Scenario testing.
|
252
|
+
|
253
|
+
This hook is called when pytest starts and sets up:
|
254
|
+
- Registration of the @pytest.mark.agent_test marker
|
255
|
+
- Debug mode configuration from command line arguments
|
256
|
+
- Global scenario reporter for collecting results
|
257
|
+
- Automatic result collection from all scenario.run() calls
|
258
|
+
|
259
|
+
Args:
|
260
|
+
config: pytest configuration object
|
261
|
+
|
262
|
+
Note:
|
263
|
+
This function runs automatically when pytest loads the plugin.
|
264
|
+
Users don't need to call it directly.
|
265
|
+
|
266
|
+
Debug Mode:
|
267
|
+
When --debug is passed to pytest, enables step-by-step scenario
|
268
|
+
execution with user intervention capabilities.
|
269
|
+
|
270
|
+
Example:
|
271
|
+
```bash
|
272
|
+
# Enable debug mode for all scenarios
|
273
|
+
pytest tests/ --debug -s
|
274
|
+
|
275
|
+
# Run normally
|
276
|
+
pytest tests/
|
277
|
+
```
|
278
|
+
"""
|
124
279
|
# Register the marker
|
125
280
|
config.addinivalue_line(
|
126
281
|
"markers", "agent_test: mark test as an agent scenario test"
|
@@ -128,7 +283,7 @@ def pytest_configure(config):
|
|
128
283
|
|
129
284
|
if config.getoption("--debug"):
|
130
285
|
print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
|
131
|
-
|
286
|
+
ScenarioConfig.configure(verbose=True, debug=True)
|
132
287
|
|
133
288
|
# Create a global reporter instance
|
134
289
|
config._scenario_reporter = ScenarioReporter()
|
@@ -149,27 +304,80 @@ def pytest_configure(config):
|
|
149
304
|
return result
|
150
305
|
|
151
306
|
# Apply the patch
|
152
|
-
|
307
|
+
ScenarioExecutor._run = auto_reporting_run
|
153
308
|
|
154
309
|
|
155
310
|
@pytest.hookimpl(trylast=True)
|
156
311
|
def pytest_unconfigure(config):
|
157
|
-
"""
|
312
|
+
"""
|
313
|
+
Clean up pytest integration when pytest exits.
|
314
|
+
|
315
|
+
This hook is called when pytest is shutting down and:
|
316
|
+
- Prints the final scenario test report
|
317
|
+
- Restores the original ScenarioExecutor._run method
|
318
|
+
- Cleans up any remaining resources
|
319
|
+
|
320
|
+
Args:
|
321
|
+
config: pytest configuration object
|
322
|
+
|
323
|
+
Note:
|
324
|
+
This function runs automatically when pytest exits.
|
325
|
+
Users don't need to call it directly.
|
326
|
+
"""
|
158
327
|
# Print the final report
|
159
328
|
if hasattr(config, "_scenario_reporter"):
|
160
329
|
config._scenario_reporter.print_report()
|
161
330
|
|
162
331
|
# Restore the original method
|
163
|
-
|
332
|
+
ScenarioExecutor._run = original_run
|
164
333
|
|
165
334
|
|
166
335
|
@pytest.fixture
|
167
336
|
def scenario_reporter(request):
|
168
337
|
"""
|
169
|
-
|
338
|
+
Pytest fixture for accessing the global scenario reporter.
|
170
339
|
|
171
340
|
This fixture provides access to the same reporter that's used for automatic
|
172
341
|
reporting, allowing tests to explicitly interact with the reporter if needed.
|
342
|
+
|
343
|
+
Args:
|
344
|
+
request: pytest request object containing test context
|
345
|
+
|
346
|
+
Yields:
|
347
|
+
ScenarioReporter: The global reporter instance collecting all scenario results
|
348
|
+
|
349
|
+
Example:
|
350
|
+
```python
|
351
|
+
@pytest.mark.agent_test
|
352
|
+
def test_with_custom_reporting(scenario_reporter):
|
353
|
+
# Run your scenarios
|
354
|
+
result1 = await scenario.run(
|
355
|
+
name="test 1",
|
356
|
+
description="First test",
|
357
|
+
agents=[agent, user_sim, judge]
|
358
|
+
)
|
359
|
+
|
360
|
+
result2 = await scenario.run(
|
361
|
+
name="test 2",
|
362
|
+
description="Second test",
|
363
|
+
agents=[agent, user_sim, judge]
|
364
|
+
)
|
365
|
+
|
366
|
+
# Access collected results
|
367
|
+
assert len(scenario_reporter.results) == 2
|
368
|
+
|
369
|
+
# Check success rate
|
370
|
+
summary = scenario_reporter.get_summary()
|
371
|
+
assert summary['success_rate'] >= 90
|
372
|
+
|
373
|
+
# Print intermediate report
|
374
|
+
if summary['failed'] > 0:
|
375
|
+
scenario_reporter.print_report()
|
376
|
+
```
|
377
|
+
|
378
|
+
Note:
|
379
|
+
The reporter automatically collects results from all scenario.run() calls,
|
380
|
+
so you don't need to manually add results unless you're doing custom reporting.
|
173
381
|
"""
|
174
382
|
# Get the global reporter from pytest config
|
175
383
|
reporter = request.config._scenario_reporter
|