PyPI - langwatch-scenario - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

langwatch-scenario 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/METADATA +140 -79
langwatch_scenario-0.4.0.dist-info/RECORD +18 -0
scenario/__init__.py +223 -9
scenario/agent_adapter.py +111 -0
scenario/cache.py +132 -8
scenario/config.py +154 -10
scenario/error_messages.py +8 -38
scenario/judge_agent.py +435 -0
scenario/pytest_plugin.py +223 -15
scenario/scenario_executor.py +428 -136
scenario/scenario_state.py +205 -0
scenario/script.py +361 -0
scenario/types.py +193 -20
scenario/user_simulator_agent.py +249 -0
scenario/utils.py +252 -2
langwatch_scenario-0.3.0.dist-info/RECORD +0 -16
scenario/scenario.py +0 -238
scenario/scenario_agent_adapter.py +0 -16
scenario/testing_agent.py +0 -279
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.4.0.dist-info}/top_level.txt +0 -0

scenario/pytest_plugin.py CHANGED Viewed

@@ -1,33 +1,129 @@
 """
 Pytest plugin for Scenario testing library.
+This module provides pytest integration for the Scenario framework, including
+automatic test reporting, debug mode support, and collection of scenario
+results across test runs. It enables seamless integration with existing
+pytest-based testing workflows.
 """
 import pytest
-from typing import TypedDict
+from typing import TypedDict, List, Tuple
 import functools
 from termcolor import colored
+from scenario.config import ScenarioConfig
 from scenario.types import ScenarioResult
-from .scenario import Scenario
+from .scenario_executor import ScenarioExecutor
+import scenario
 class ScenarioReporterResults(TypedDict):
-    scenario: Scenario
+    """
+    Type definition for scenario test results stored by the reporter.
+    Attributes:
+        scenario: The ScenarioExecutor instance that ran the test
+        result: The ScenarioResult containing test outcome and details
+    """
+    scenario: ScenarioExecutor
     result: ScenarioResult
 # ScenarioReporter class definition moved outside the fixture for global use
 class ScenarioReporter:
+    """
+    Collects and reports on scenario test results across a pytest session.
+    This class automatically collects results from all scenario tests run during
+    a pytest session and provides comprehensive reporting including success rates,
+    timing information, and detailed failure analysis.
+    The reporter is automatically instantiated by the pytest plugin and collects
+    results from all scenario.run() calls without requiring explicit user setup.
+    Attributes:
+        results: List of all scenario test results collected during the session
+    Example:
+        The reporter is used automatically, but you can access it in tests:
+        ```python
+        def test_my_scenarios(scenario_reporter):
+            # Run your scenarios
+            result1 = await scenario.run(...)
+            result2 = await scenario.run(...)
+            # Check collected results
+            assert len(scenario_reporter.results) == 2
+            # Get summary statistics
+            summary = scenario_reporter.get_summary()
+            print(f"Success rate: {summary['success_rate']}%")
+        ```
+    """
     def __init__(self):
+        """Initialize an empty scenario reporter."""
         self.results: list[ScenarioReporterResults] = []
-    def add_result(self, scenario, result):
-        """Add a test result to the reporter."""
+    def add_result(self, scenario: ScenarioExecutor, result: ScenarioResult):
+        """
+        Add a test result to the reporter.
+        This method is called automatically by the pytest plugin whenever
+        a scenario.run() call completes. It stores both the scenario
+        configuration and the test result for later reporting.
+        Args:
+            scenario: The ScenarioExecutor instance that ran the test
+            result: The ScenarioResult containing test outcome and details
+        Example:
+            ```python
+            # This happens automatically when you run scenarios
+            result = await scenario.run(
+                name="my test",
+                description="Test description",
+                agents=[
+                    my_agent,
+                    scenario.UserSimulatorAgent(),
+                    scenario.JudgeAgent(criteria=["Agent provides helpful response"])
+                ]
+            )
+            # Result is automatically added to the global reporter
+            ```
+        """
         self.results.append({"scenario": scenario, "result": result})
     def get_summary(self):
-        """Get a summary of all test results."""
+        """
+        Get a summary of all test results.
+        Calculates aggregate statistics across all scenario tests that
+        have been run during the current pytest session.
+        Returns:
+            Dictionary containing summary statistics:
+            - total: Total number of scenarios run
+            - passed: Number of scenarios that passed
+            - failed: Number of scenarios that failed
+            - success_rate: Percentage of scenarios that passed (0-100)
+        Example:
+            ```python
+            def test_summary_check(scenario_reporter):
+                # Run some scenarios...
+                await scenario.run(...)
+                await scenario.run(...)
+                summary = scenario_reporter.get_summary()
+                assert summary['total'] == 2
+                assert summary['success_rate'] >= 80  # Require 80% success rate
+            ```
+        """
         total = len(self.results)
         passed = sum(1 for r in self.results if r["result"].success)
         failed = total - passed
@@ -40,7 +136,36 @@ class ScenarioReporter:
         }
     def print_report(self):
-        """Print a detailed report of all test results."""
+        """
+        Print a detailed report of all test results.
+        Outputs a comprehensive report to the console showing:
+        - Overall summary statistics
+        - Individual scenario results with success/failure status
+        - Detailed reasoning for each scenario outcome
+        - Timing information when available
+        - Criteria pass/fail breakdown for judge-evaluated scenarios
+        The report is automatically printed at the end of pytest sessions,
+        but can also be called manually for intermediate reporting.
+        Example output:
+        ```
+        === Scenario Test Report ===
+        Total Scenarios: 5
+        Passed: 4
+        Failed: 1
+        Success Rate: 80%
+        1. weather query test - PASSED in 2.34s (agent: 1.12s)
+           Reasoning: Agent successfully provided weather information
+           Passed Criteria: 2/2
+        2. complex math problem - FAILED in 5.67s (agent: 3.45s)
+           Reasoning: Agent provided incorrect calculation
+           Failed Criteria: 1
+        ```
+        """
         if not self.results:
             return  # Skip report if no results
@@ -94,7 +219,9 @@ class ScenarioReporter:
             if hasattr(result, "passed_criteria") and result.passed_criteria:
                 criteria_count = len(result.passed_criteria)
-                total_criteria = len(scenario.criteria)
+                total_criteria = len(result.passed_criteria) + len(
+                    result.failed_criteria
+                )
                 criteria_color = (
                     "green" if criteria_count == total_criteria else "yellow"
                 )
@@ -115,12 +242,40 @@ class ScenarioReporter:
 # Store the original run method
-original_run = Scenario.run
+original_run = ScenarioExecutor._run
 @pytest.hookimpl(trylast=True)
 def pytest_configure(config):
-    """Register the agent_test marker and set up automatic reporting."""
+    """
+    Configure pytest integration for Scenario testing.
+    This hook is called when pytest starts and sets up:
+    - Registration of the @pytest.mark.agent_test marker
+    - Debug mode configuration from command line arguments
+    - Global scenario reporter for collecting results
+    - Automatic result collection from all scenario.run() calls
+    Args:
+        config: pytest configuration object
+    Note:
+        This function runs automatically when pytest loads the plugin.
+        Users don't need to call it directly.
+    Debug Mode:
+        When --debug is passed to pytest, enables step-by-step scenario
+        execution with user intervention capabilities.
+    Example:
+        ```bash
+        # Enable debug mode for all scenarios
+        pytest tests/ --debug -s
+        # Run normally
+        pytest tests/
+        ```
+    """
     # Register the marker
     config.addinivalue_line(
         "markers", "agent_test: mark test as an agent scenario test"
@@ -128,7 +283,7 @@ def pytest_configure(config):
     if config.getoption("--debug"):
         print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
-        Scenario.configure(verbose=True, debug=True)
+        ScenarioConfig.configure(verbose=True, debug=True)
     # Create a global reporter instance
     config._scenario_reporter = ScenarioReporter()
@@ -149,27 +304,80 @@ def pytest_configure(config):
         return result
     # Apply the patch
-    Scenario.run = auto_reporting_run
+    ScenarioExecutor._run = auto_reporting_run
 @pytest.hookimpl(trylast=True)
 def pytest_unconfigure(config):
-    """Clean up and print final report when pytest exits."""
+    """
+    Clean up pytest integration when pytest exits.
+    This hook is called when pytest is shutting down and:
+    - Prints the final scenario test report
+    - Restores the original ScenarioExecutor._run method
+    - Cleans up any remaining resources
+    Args:
+        config: pytest configuration object
+    Note:
+        This function runs automatically when pytest exits.
+        Users don't need to call it directly.
+    """
     # Print the final report
     if hasattr(config, "_scenario_reporter"):
         config._scenario_reporter.print_report()
     # Restore the original method
-    Scenario.run = original_run
+    ScenarioExecutor._run = original_run
 @pytest.fixture
 def scenario_reporter(request):
     """
-    A pytest fixture for accessing the global scenario reporter.
+    Pytest fixture for accessing the global scenario reporter.
     This fixture provides access to the same reporter that's used for automatic
     reporting, allowing tests to explicitly interact with the reporter if needed.
+    Args:
+        request: pytest request object containing test context
+    Yields:
+        ScenarioReporter: The global reporter instance collecting all scenario results
+    Example:
+        ```python
+        @pytest.mark.agent_test
+        def test_with_custom_reporting(scenario_reporter):
+            # Run your scenarios
+            result1 = await scenario.run(
+                name="test 1",
+                description="First test",
+                agents=[agent, user_sim, judge]
+            )
+            result2 = await scenario.run(
+                name="test 2",
+                description="Second test",
+                agents=[agent, user_sim, judge]
+            )
+            # Access collected results
+            assert len(scenario_reporter.results) == 2
+            # Check success rate
+            summary = scenario_reporter.get_summary()
+            assert summary['success_rate'] >= 90
+            # Print intermediate report
+            if summary['failed'] > 0:
+                scenario_reporter.print_report()
+        ```
+    Note:
+        The reporter automatically collects results from all scenario.run() calls,
+        so you don't need to manually add results unless you're doing custom reporting.
     """
     # Get the global reporter from pytest config
     reporter = request.config._scenario_reporter

langwatch-scenario 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

langwatch-scenario 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl