langwatch-scenario 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scenario/pytest_plugin.py CHANGED
@@ -1,33 +1,129 @@
1
1
  """
2
2
  Pytest plugin for Scenario testing library.
3
+
4
+ This module provides pytest integration for the Scenario framework, including
5
+ automatic test reporting, debug mode support, and collection of scenario
6
+ results across test runs. It enables seamless integration with existing
7
+ pytest-based testing workflows.
3
8
  """
4
9
 
5
10
  import pytest
6
- from typing import TypedDict
11
+ from typing import TypedDict, List, Tuple
7
12
  import functools
8
13
  from termcolor import colored
9
14
 
10
- from scenario.result import ScenarioResult
15
+ from scenario.config import ScenarioConfig
16
+ from scenario.types import ScenarioResult
11
17
 
12
- from .scenario import Scenario
18
+ from .scenario_executor import ScenarioExecutor
19
+ import scenario
13
20
 
14
21
 
15
22
  class ScenarioReporterResults(TypedDict):
16
- scenario: Scenario
23
+ """
24
+ Type definition for scenario test results stored by the reporter.
25
+
26
+ Attributes:
27
+ scenario: The ScenarioExecutor instance that ran the test
28
+ result: The ScenarioResult containing test outcome and details
29
+ """
30
+
31
+ scenario: ScenarioExecutor
17
32
  result: ScenarioResult
18
33
 
19
34
 
20
35
  # ScenarioReporter class definition moved outside the fixture for global use
21
36
  class ScenarioReporter:
37
+ """
38
+ Collects and reports on scenario test results across a pytest session.
39
+
40
+ This class automatically collects results from all scenario tests run during
41
+ a pytest session and provides comprehensive reporting including success rates,
42
+ timing information, and detailed failure analysis.
43
+
44
+ The reporter is automatically instantiated by the pytest plugin and collects
45
+ results from all scenario.run() calls without requiring explicit user setup.
46
+
47
+ Attributes:
48
+ results: List of all scenario test results collected during the session
49
+
50
+ Example:
51
+ The reporter is used automatically, but you can access it in tests:
52
+
53
+ ```python
54
+ def test_my_scenarios(scenario_reporter):
55
+ # Run your scenarios
56
+ result1 = await scenario.run(...)
57
+ result2 = await scenario.run(...)
58
+
59
+ # Check collected results
60
+ assert len(scenario_reporter.results) == 2
61
+
62
+ # Get summary statistics
63
+ summary = scenario_reporter.get_summary()
64
+ print(f"Success rate: {summary['success_rate']}%")
65
+ ```
66
+ """
67
+
22
68
  def __init__(self):
69
+ """Initialize an empty scenario reporter."""
23
70
  self.results: list[ScenarioReporterResults] = []
24
71
 
25
- def add_result(self, scenario, result):
26
- """Add a test result to the reporter."""
72
+ def add_result(self, scenario: ScenarioExecutor, result: ScenarioResult):
73
+ """
74
+ Add a test result to the reporter.
75
+
76
+ This method is called automatically by the pytest plugin whenever
77
+ a scenario.run() call completes. It stores both the scenario
78
+ configuration and the test result for later reporting.
79
+
80
+ Args:
81
+ scenario: The ScenarioExecutor instance that ran the test
82
+ result: The ScenarioResult containing test outcome and details
83
+
84
+ Example:
85
+ ```python
86
+ # This happens automatically when you run scenarios
87
+ result = await scenario.run(
88
+ name="my test",
89
+ description="Test description",
90
+ agents=[
91
+ my_agent,
92
+ scenario.UserSimulatorAgent(),
93
+ scenario.JudgeAgent(criteria=["Agent provides helpful response"])
94
+ ]
95
+ )
96
+ # Result is automatically added to the global reporter
97
+ ```
98
+ """
27
99
  self.results.append({"scenario": scenario, "result": result})
28
100
 
29
101
  def get_summary(self):
30
- """Get a summary of all test results."""
102
+ """
103
+ Get a summary of all test results.
104
+
105
+ Calculates aggregate statistics across all scenario tests that
106
+ have been run during the current pytest session.
107
+
108
+ Returns:
109
+ Dictionary containing summary statistics:
110
+ - total: Total number of scenarios run
111
+ - passed: Number of scenarios that passed
112
+ - failed: Number of scenarios that failed
113
+ - success_rate: Percentage of scenarios that passed (0-100)
114
+
115
+ Example:
116
+ ```python
117
+ def test_summary_check(scenario_reporter):
118
+ # Run some scenarios...
119
+ await scenario.run(...)
120
+ await scenario.run(...)
121
+
122
+ summary = scenario_reporter.get_summary()
123
+ assert summary['total'] == 2
124
+ assert summary['success_rate'] >= 80 # Require 80% success rate
125
+ ```
126
+ """
31
127
  total = len(self.results)
32
128
  passed = sum(1 for r in self.results if r["result"].success)
33
129
  failed = total - passed
@@ -40,7 +136,36 @@ class ScenarioReporter:
40
136
  }
41
137
 
42
138
  def print_report(self):
43
- """Print a detailed report of all test results."""
139
+ """
140
+ Print a detailed report of all test results.
141
+
142
+ Outputs a comprehensive report to the console showing:
143
+ - Overall summary statistics
144
+ - Individual scenario results with success/failure status
145
+ - Detailed reasoning for each scenario outcome
146
+ - Timing information when available
147
+ - Criteria pass/fail breakdown for judge-evaluated scenarios
148
+
149
+ The report is automatically printed at the end of pytest sessions,
150
+ but can also be called manually for intermediate reporting.
151
+
152
+ Example output:
153
+ ```
154
+ === Scenario Test Report ===
155
+ Total Scenarios: 5
156
+ Passed: 4
157
+ Failed: 1
158
+ Success Rate: 80%
159
+
160
+ 1. weather query test - PASSED in 2.34s (agent: 1.12s)
161
+ Reasoning: Agent successfully provided weather information
162
+ Passed Criteria: 2/2
163
+
164
+ 2. complex math problem - FAILED in 5.67s (agent: 3.45s)
165
+ Reasoning: Agent provided incorrect calculation
166
+ Failed Criteria: 1
167
+ ```
168
+ """
44
169
  if not self.results:
45
170
  return # Skip report if no results
46
171
 
@@ -94,7 +219,9 @@ class ScenarioReporter:
94
219
 
95
220
  if hasattr(result, "passed_criteria") and result.passed_criteria:
96
221
  criteria_count = len(result.passed_criteria)
97
- total_criteria = len(scenario.criteria)
222
+ total_criteria = len(result.passed_criteria) + len(
223
+ result.failed_criteria
224
+ )
98
225
  criteria_color = (
99
226
  "green" if criteria_count == total_criteria else "yellow"
100
227
  )
@@ -115,12 +242,40 @@ class ScenarioReporter:
115
242
 
116
243
 
117
244
  # Store the original run method
118
- original_run = Scenario.run
245
+ original_run = ScenarioExecutor._run
119
246
 
120
247
 
121
248
  @pytest.hookimpl(trylast=True)
122
249
  def pytest_configure(config):
123
- """Register the agent_test marker and set up automatic reporting."""
250
+ """
251
+ Configure pytest integration for Scenario testing.
252
+
253
+ This hook is called when pytest starts and sets up:
254
+ - Registration of the @pytest.mark.agent_test marker
255
+ - Debug mode configuration from command line arguments
256
+ - Global scenario reporter for collecting results
257
+ - Automatic result collection from all scenario.run() calls
258
+
259
+ Args:
260
+ config: pytest configuration object
261
+
262
+ Note:
263
+ This function runs automatically when pytest loads the plugin.
264
+ Users don't need to call it directly.
265
+
266
+ Debug Mode:
267
+ When --debug is passed to pytest, enables step-by-step scenario
268
+ execution with user intervention capabilities.
269
+
270
+ Example:
271
+ ```bash
272
+ # Enable debug mode for all scenarios
273
+ pytest tests/ --debug -s
274
+
275
+ # Run normally
276
+ pytest tests/
277
+ ```
278
+ """
124
279
  # Register the marker
125
280
  config.addinivalue_line(
126
281
  "markers", "agent_test: mark test as an agent scenario test"
@@ -128,7 +283,7 @@ def pytest_configure(config):
128
283
 
129
284
  if config.getoption("--debug"):
130
285
  print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
131
- Scenario.configure(verbose=True, debug=True)
286
+ ScenarioConfig.configure(verbose=True, debug=True)
132
287
 
133
288
  # Create a global reporter instance
134
289
  config._scenario_reporter = ScenarioReporter()
@@ -149,27 +304,80 @@ def pytest_configure(config):
149
304
  return result
150
305
 
151
306
  # Apply the patch
152
- Scenario.run = auto_reporting_run
307
+ ScenarioExecutor._run = auto_reporting_run
153
308
 
154
309
 
155
310
  @pytest.hookimpl(trylast=True)
156
311
  def pytest_unconfigure(config):
157
- """Clean up and print final report when pytest exits."""
312
+ """
313
+ Clean up pytest integration when pytest exits.
314
+
315
+ This hook is called when pytest is shutting down and:
316
+ - Prints the final scenario test report
317
+ - Restores the original ScenarioExecutor._run method
318
+ - Cleans up any remaining resources
319
+
320
+ Args:
321
+ config: pytest configuration object
322
+
323
+ Note:
324
+ This function runs automatically when pytest exits.
325
+ Users don't need to call it directly.
326
+ """
158
327
  # Print the final report
159
328
  if hasattr(config, "_scenario_reporter"):
160
329
  config._scenario_reporter.print_report()
161
330
 
162
331
  # Restore the original method
163
- Scenario.run = original_run
332
+ ScenarioExecutor._run = original_run
164
333
 
165
334
 
166
335
  @pytest.fixture
167
336
  def scenario_reporter(request):
168
337
  """
169
- A pytest fixture for accessing the global scenario reporter.
338
+ Pytest fixture for accessing the global scenario reporter.
170
339
 
171
340
  This fixture provides access to the same reporter that's used for automatic
172
341
  reporting, allowing tests to explicitly interact with the reporter if needed.
342
+
343
+ Args:
344
+ request: pytest request object containing test context
345
+
346
+ Yields:
347
+ ScenarioReporter: The global reporter instance collecting all scenario results
348
+
349
+ Example:
350
+ ```python
351
+ @pytest.mark.agent_test
352
+ def test_with_custom_reporting(scenario_reporter):
353
+ # Run your scenarios
354
+ result1 = await scenario.run(
355
+ name="test 1",
356
+ description="First test",
357
+ agents=[agent, user_sim, judge]
358
+ )
359
+
360
+ result2 = await scenario.run(
361
+ name="test 2",
362
+ description="Second test",
363
+ agents=[agent, user_sim, judge]
364
+ )
365
+
366
+ # Access collected results
367
+ assert len(scenario_reporter.results) == 2
368
+
369
+ # Check success rate
370
+ summary = scenario_reporter.get_summary()
371
+ assert summary['success_rate'] >= 90
372
+
373
+ # Print intermediate report
374
+ if summary['failed'] > 0:
375
+ scenario_reporter.print_report()
376
+ ```
377
+
378
+ Note:
379
+ The reporter automatically collects results from all scenario.run() calls,
380
+ so you don't need to manually add results unless you're doing custom reporting.
173
381
  """
174
382
  # Get the global reporter from pytest config
175
383
  reporter = request.config._scenario_reporter