langwatch-scenario 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scenario/cache.py CHANGED
@@ -1,3 +1,12 @@
1
+ """
2
+ Caching module for deterministic scenario testing.
3
+
4
+ This module provides caching functionality to make scenario tests deterministic
5
+ and repeatable. It caches LLM calls and other non-deterministic operations based
6
+ on scenario configuration and function arguments, enabling consistent test results
7
+ across multiple runs.
8
+ """
9
+
1
10
  from contextvars import ContextVar
2
11
  import inspect
3
12
  import os
@@ -8,29 +17,100 @@ from joblib import Memory
8
17
  import json
9
18
 
10
19
  import wrapt
11
- from scenario.utils import SerializableWithStringFallback
20
+ from scenario.types import AgentInput
21
+ from scenario._utils.utils import SerializableWithStringFallback
12
22
 
13
23
  if TYPE_CHECKING:
14
- from scenario.scenario import Scenario
24
+ from scenario.scenario_executor import ScenarioExecutor
15
25
 
16
26
 
17
27
  context_scenario = ContextVar("scenario")
18
28
 
29
+
19
30
  def get_cache() -> Memory:
20
- """Get a cross-platform cache directory for scenario."""
31
+ """
32
+ Get a cross-platform cache directory for scenario execution.
33
+
34
+ Creates and returns a joblib Memory instance configured to use a
35
+ cross-platform cache directory. The cache location can be customized
36
+ via the SCENARIO_CACHE_DIR environment variable.
37
+
38
+ Returns:
39
+ Memory instance configured with the appropriate cache directory
40
+
41
+ Example:
42
+ ```
43
+ # Default cache location: ~/.scenario/cache
44
+ cache = get_cache()
45
+
46
+ # Custom cache location via environment variable
47
+ os.environ["SCENARIO_CACHE_DIR"] = "/tmp/my_scenario_cache"
48
+ cache = get_cache()
49
+ ```
50
+ """
21
51
  home_dir = str(Path.home())
22
52
  cache_dir = os.path.join(home_dir, ".scenario", "cache")
23
53
 
24
54
  return Memory(location=os.environ.get("SCENARIO_CACHE_DIR", cache_dir), verbose=0)
25
55
 
56
+
26
57
  memory = get_cache()
27
58
 
59
+
28
60
  def scenario_cache(ignore=[]):
61
+ """
62
+ Decorator for caching function calls during scenario execution.
63
+
64
+ This decorator caches function calls based on the scenario's cache_key,
65
+ scenario configuration, and function arguments. It enables deterministic
66
+ testing by ensuring the same inputs always produce the same outputs,
67
+ making tests repeatable and faster on subsequent runs.
68
+
69
+ Args:
70
+ ignore: List of argument names to exclude from the cache key computation.
71
+ Commonly used to ignore 'self' for instance methods or other
72
+ non-deterministic arguments.
73
+
74
+ Returns:
75
+ Decorator function that can be applied to any function or method
76
+
77
+ Example:
78
+ ```
79
+ import scenario
80
+
81
+ class MyAgent:
82
+ @scenario.cache(ignore=["self"])
83
+ def invoke(self, message: str, context: dict) -> str:
84
+ # This LLM call will be cached
85
+ response = llm_client.complete(
86
+ model="gpt-4",
87
+ messages=[{"role": "user", "content": message}]
88
+ )
89
+ return response.choices[0].message.content
90
+
91
+ # Usage in tests
92
+ scenario.configure(cache_key="my-test-suite-v1")
93
+
94
+ # First run: makes actual LLM calls and caches results
95
+ result1 = await scenario.run(...)
96
+
97
+ # Second run: uses cached results, much faster
98
+ result2 = await scenario.run(...)
99
+ # result1 and result2 will be identical
100
+ ```
101
+
102
+ Note:
103
+ - Caching only occurs when a cache_key is set in the scenario configuration
104
+ - The cache key is computed from scenario config, function arguments, and cache_key
105
+ - AgentInput objects are specially handled to exclude thread_id from caching
106
+ - Both sync and async functions are supported
107
+ """
108
+
29
109
  @wrapt.decorator
30
110
  def wrapper(wrapped: Callable, instance=None, args=[], kwargs={}):
31
- scenario: "Scenario" = context_scenario.get()
111
+ scenario: "ScenarioExecutor" = context_scenario.get()
32
112
 
33
- if not scenario.cache_key:
113
+ if not scenario.config.cache_key:
34
114
  return wrapped(*args, **kwargs)
35
115
 
36
116
  sig = inspect.signature(wrapped)
@@ -43,20 +123,65 @@ def scenario_cache(ignore=[]):
43
123
  if arg in all_args:
44
124
  del all_args[arg]
45
125
 
126
+ for key, value in all_args.items():
127
+ if isinstance(value, AgentInput):
128
+ scenario_state = value.scenario_state.model_dump(exclude={"thread_id"})
129
+ all_args[key] = value.model_dump(exclude={"thread_id"})
130
+ all_args[key]["scenario_state"] = scenario_state
131
+
46
132
  cache_key = json.dumps(
47
133
  {
48
- "cache_key": scenario.cache_key,
49
- "scenario": scenario.model_dump(exclude={"agent"}),
134
+ "cache_key": scenario.config.cache_key,
135
+ "scenario": scenario.config.model_dump(exclude={"agents"}),
50
136
  "all_args": all_args,
51
137
  },
52
138
  cls=SerializableWithStringFallback,
53
139
  )
54
140
 
55
- return _cached_call(wrapped, args, kwargs, cache_key=cache_key)
141
+ # if is an async function, we need to wrap it in a sync function
142
+ if inspect.iscoroutinefunction(wrapped):
143
+ return _async_cached_call(wrapped, args, kwargs, cache_key=cache_key)
144
+ else:
145
+ return _cached_call(wrapped, args, kwargs, cache_key=cache_key)
56
146
 
57
147
  return wrapper
58
148
 
59
149
 
60
150
  @memory.cache(ignore=["func", "args", "kwargs"])
61
151
  def _cached_call(func: Callable, args, kwargs, cache_key):
62
- return func(*args, **kwargs)
152
+ """
153
+ Internal function for caching synchronous function calls.
154
+
155
+ This function is used internally by the scenario_cache decorator
156
+ to cache synchronous function calls using joblib.Memory.
157
+
158
+ Args:
159
+ func: The function to call and cache
160
+ args: Positional arguments for the function
161
+ kwargs: Keyword arguments for the function
162
+ cache_key: Cache key for deterministic caching
163
+
164
+ Returns:
165
+ The result of calling func(*args, **kwargs)
166
+ """
167
+ return func(*args, **kwargs)
168
+
169
+
170
+ @memory.cache(ignore=["func", "args", "kwargs"])
171
+ async def _async_cached_call(func: Callable, args, kwargs, cache_key):
172
+ """
173
+ Internal function for caching asynchronous function calls.
174
+
175
+ This function is used internally by the scenario_cache decorator
176
+ to cache asynchronous function calls using joblib.Memory.
177
+
178
+ Args:
179
+ func: The async function to call and cache
180
+ args: Positional arguments for the function
181
+ kwargs: Keyword arguments for the function
182
+ cache_key: Cache key for deterministic caching
183
+
184
+ Returns:
185
+ The result of calling await func(*args, **kwargs)
186
+ """
187
+ return await func(*args, **kwargs)
scenario/config.py CHANGED
@@ -1,33 +1,166 @@
1
1
  """
2
2
  Configuration module for Scenario.
3
+
4
+ This module provides configuration classes for customizing the behavior of the
5
+ Scenario testing framework, including LLM model settings, execution parameters,
6
+ and debugging options.
3
7
  """
4
8
 
5
- from typing import TYPE_CHECKING, Any, Optional, Type, Union
9
+ from typing import Optional, Union, ClassVar
6
10
  from pydantic import BaseModel
7
11
 
8
- if TYPE_CHECKING:
9
- from scenario.scenario_agent_adapter import ScenarioAgentAdapter
10
12
 
11
- ScenarioAgentType = ScenarioAgentAdapter
12
- else:
13
- ScenarioAgentType = Any
13
+ class ModelConfig(BaseModel):
14
+ """
15
+ Configuration for LLM model settings.
16
+
17
+ This class encapsulates all the parameters needed to configure an LLM model
18
+ for use with user simulator and judge agents in the Scenario framework.
19
+
20
+ Attributes:
21
+ model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
22
+ api_key: Optional API key for the model provider
23
+ temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
24
+ max_tokens: Maximum number of tokens to generate in responses
25
+
26
+ Example:
27
+ ```
28
+ model_config = ModelConfig(
29
+ model="openai/gpt-4.1-mini",
30
+ api_key="your-api-key",
31
+ temperature=0.1,
32
+ max_tokens=1000
33
+ )
34
+ ```
35
+ """
36
+
37
+ model: str
38
+ api_key: Optional[str] = None
39
+ temperature: float = 0.0
40
+ max_tokens: Optional[int] = None
14
41
 
15
42
 
16
43
  class ScenarioConfig(BaseModel):
17
44
  """
18
- Configuration class for the Scenario library.
45
+ Global configuration class for the Scenario testing framework.
19
46
 
20
- This allows users to set global configuration settings for the library,
21
- such as the LLM provider and model to use for the testing agent.
47
+ This class allows users to set default behavior and parameters that apply
48
+ to all scenario executions, including the LLM model to use for simulator
49
+ and judge agents, execution limits, and debugging options.
50
+
51
+ Attributes:
52
+ default_model: Default LLM model configuration for agents (can be string or ModelConfig)
53
+ max_turns: Maximum number of conversation turns before scenario times out
54
+ verbose: Whether to show detailed output during execution (True/False or verbosity level)
55
+ cache_key: Key for caching scenario results to ensure deterministic behavior
56
+ debug: Whether to enable debug mode with step-by-step interaction
57
+
58
+ Example:
59
+ ```
60
+ # Configure globally for all scenarios
61
+ scenario.configure(
62
+ default_model="openai/gpt-4.1-mini",
63
+ max_turns=15,
64
+ verbose=True,
65
+ cache_key="my-test-suite-v1",
66
+ debug=False
67
+ )
68
+
69
+ # Or create a specific config instance
70
+ config = ScenarioConfig(
71
+ default_model=ModelConfig(
72
+ model="openai/gpt-4.1-mini",
73
+ temperature=0.2
74
+ ),
75
+ max_turns=20
76
+ )
77
+ ```
22
78
  """
23
79
 
24
- testing_agent: Optional[Type[ScenarioAgentType]] = None
80
+ default_model: Optional[Union[str, ModelConfig]] = None
25
81
  max_turns: Optional[int] = 10
26
82
  verbose: Optional[Union[bool, int]] = True
27
83
  cache_key: Optional[str] = None
28
84
  debug: Optional[bool] = False
29
85
 
86
+ default_config: ClassVar[Optional["ScenarioConfig"]] = None
87
+
88
+ @classmethod
89
+ def configure(
90
+ cls,
91
+ default_model: Optional[str] = None,
92
+ max_turns: Optional[int] = None,
93
+ verbose: Optional[Union[bool, int]] = None,
94
+ cache_key: Optional[str] = None,
95
+ debug: Optional[bool] = None,
96
+ ) -> None:
97
+ """
98
+ Set global configuration settings for all scenario executions.
99
+
100
+ This method allows you to configure default behavior that will be applied
101
+ to all scenarios unless explicitly overridden in individual scenario runs.
102
+
103
+ Args:
104
+ default_model: Default LLM model identifier for user simulator and judge agents
105
+ max_turns: Maximum number of conversation turns before timeout (default: 10)
106
+ verbose: Enable verbose output during scenario execution
107
+ cache_key: Cache key for deterministic scenario behavior across runs
108
+ debug: Enable debug mode for step-by-step execution with user intervention
109
+
110
+ Example:
111
+ ```
112
+ import scenario
113
+
114
+ # Set up default configuration
115
+ scenario.configure(
116
+ default_model="openai/gpt-4.1-mini",
117
+ max_turns=15,
118
+ verbose=True,
119
+ debug=False
120
+ )
121
+
122
+ # All subsequent scenario runs will use these defaults
123
+ result = await scenario.run(
124
+ name="my test",
125
+ description="Test scenario",
126
+ agents=[my_agent, scenario.UserSimulatorAgent(), scenario.JudgeAgent()]
127
+ )
128
+ ```
129
+ """
130
+ existing_config = cls.default_config or ScenarioConfig()
131
+
132
+ cls.default_config = existing_config.merge(
133
+ ScenarioConfig(
134
+ default_model=default_model,
135
+ max_turns=max_turns,
136
+ verbose=verbose,
137
+ cache_key=cache_key,
138
+ debug=debug,
139
+ )
140
+ )
141
+
30
142
  def merge(self, other: "ScenarioConfig") -> "ScenarioConfig":
143
+ """
144
+ Merge this configuration with another configuration.
145
+
146
+ Values from the other configuration will override values in this
147
+ configuration where they are not None.
148
+
149
+ Args:
150
+ other: Another ScenarioConfig instance to merge with
151
+
152
+ Returns:
153
+ A new ScenarioConfig instance with merged values
154
+
155
+ Example:
156
+ ```
157
+ base_config = ScenarioConfig(max_turns=10, verbose=True)
158
+ override_config = ScenarioConfig(max_turns=20)
159
+
160
+ merged = base_config.merge(override_config)
161
+ # Result: max_turns=20, verbose=True
162
+ ```
163
+ """
31
164
  return ScenarioConfig(
32
165
  **{
33
166
  **self.items(),
@@ -36,4 +169,17 @@ class ScenarioConfig(BaseModel):
36
169
  )
37
170
 
38
171
  def items(self):
172
+ """
173
+ Get configuration items as a dictionary.
174
+
175
+ Returns:
176
+ Dictionary of configuration key-value pairs, excluding None values
177
+
178
+ Example:
179
+ ```
180
+ config = ScenarioConfig(max_turns=15, verbose=True)
181
+ items = config.items()
182
+ # Result: {"max_turns": 15, "verbose": True}
183
+ ```
184
+ """
39
185
  return {k: getattr(self, k) for k in self.model_dump(exclude_none=True).keys()}
@@ -0,0 +1,66 @@
1
+ """
2
+ Scenario events module for handling event publishing, processing, and reporting.
3
+
4
+ This module provides event models, an event bus for processing, and utilities
5
+ for converting between different message formats.
6
+ """
7
+
8
+ # Core event types and models
9
+ from .events import (
10
+ ScenarioEvent,
11
+ ScenarioRunStartedEvent,
12
+ ScenarioRunStartedEventMetadata,
13
+ ScenarioRunFinishedEvent,
14
+ ScenarioRunFinishedEventResults,
15
+ ScenarioRunFinishedEventVerdict,
16
+ ScenarioRunFinishedEventStatus,
17
+ ScenarioMessageSnapshotEvent,
18
+ MessageType,
19
+ )
20
+
21
+ # Event processing infrastructure
22
+ from .event_bus import ScenarioEventBus
23
+ from .event_reporter import EventReporter
24
+
25
+ # Message utilities and types
26
+ from .messages import (
27
+ Message,
28
+ UserMessage,
29
+ AssistantMessage,
30
+ SystemMessage,
31
+ ToolMessage,
32
+ ToolCall,
33
+ FunctionCall,
34
+ )
35
+
36
+ # Utility functions
37
+ from .utils import convert_messages_to_ag_ui_messages
38
+
39
+ __all__ = [
40
+ # Event types
41
+ "ScenarioEvent",
42
+ "ScenarioRunStartedEvent",
43
+ "ScenarioRunStartedEventMetadata",
44
+ "ScenarioRunFinishedEvent",
45
+ "ScenarioRunFinishedEventResults",
46
+ "ScenarioRunFinishedEventVerdict",
47
+ "ScenarioRunFinishedEventStatus",
48
+ "ScenarioMessageSnapshotEvent",
49
+ "MessageType",
50
+
51
+ # Event processing
52
+ "ScenarioEventBus",
53
+ "EventReporter",
54
+
55
+ # Messages
56
+ "Message",
57
+ "UserMessage",
58
+ "AssistantMessage",
59
+ "SystemMessage",
60
+ "ToolMessage",
61
+ "ToolCall",
62
+ "FunctionCall",
63
+
64
+ # Utils
65
+ "convert_messages_to_ag_ui_messages",
66
+ ]
@@ -0,0 +1,175 @@
1
+ from rx.subject.subject import Subject
2
+ from rx import operators as ops
3
+ from typing import Optional
4
+ from datetime import datetime, UTC
5
+ from .events import ScenarioEvent, ScenarioRunFinishedEvent
6
+ from .event_reporter import EventReporter
7
+ from typing import Any
8
+
9
+ import asyncio
10
+
11
+
12
+ class ScenarioEventBus:
13
+ """
14
+ Manages scenario event publishing, subscription, and processing pipeline using RxPY.
15
+
16
+ The EventBus provides a centralized event processing system that handles scenario
17
+ events asynchronously with retry logic and concurrent processing. It automatically
18
+ manages the event stream lifecycle and ensures all events are processed before
19
+ completion.
20
+
21
+ Events are processed concurrently to improve performance, and failed event
22
+ processing is automatically retried with exponential backoff.
23
+
24
+ Attributes:
25
+ _events: RxPY Subject for event stream management
26
+ _event_reporter: EventReporter instance for HTTP posting of events
27
+ _processing_complete: Async event to signal when all events are processed
28
+ _processing_task: Background task for event processing
29
+ _max_retries: Maximum number of retry attempts for failed event processing
30
+
31
+ Example:
32
+ ```python
33
+ # Create event bus with custom reporter
34
+ reporter = EventReporter(endpoint="https://api.langwatch.ai")
35
+ event_bus = ScenarioEventBus(event_reporter=reporter, max_retries=5)
36
+
37
+ # Start listening for events
38
+ await event_bus.listen()
39
+
40
+ # Publish events
41
+ event_bus.publish(scenario_started_event)
42
+ event_bus.publish(message_snapshot_event)
43
+ event_bus.publish(scenario_finished_event) # This completes the stream
44
+
45
+ # Wait for all events to be processed
46
+ await event_bus.drain()
47
+ ```
48
+ """
49
+
50
+ def __init__(
51
+ self, event_reporter: Optional[EventReporter] = None, max_retries: int = 3
52
+ ):
53
+ """
54
+ Initialize the event bus with optional event reporter and retry configuration.
55
+
56
+ Args:
57
+ event_reporter: Optional EventReporter for HTTP posting of events.
58
+ If not provided, a default EventReporter will be created.
59
+ max_retries: Maximum number of retry attempts for failed event processing.
60
+ Defaults to 3 attempts with exponential backoff.
61
+ """
62
+ self._events = Subject()
63
+ # Use default EventReporter if none provided
64
+ self._event_reporter: EventReporter = event_reporter or EventReporter()
65
+ self._processing_complete = asyncio.Event()
66
+ self._processing_task: Optional[asyncio.Task[Any]] = None
67
+ self._max_retries = max_retries
68
+
69
+ def publish(self, event: ScenarioEvent) -> None:
70
+ """
71
+ Publishes an event into the processing pipeline.
72
+
73
+ This method adds an event to the RxPY stream for processing. The event
74
+ timestamp is automatically set to the current time in milliseconds if
75
+ not already provided. Publishing a ScenarioRunFinishedEvent automatically
76
+ completes the event stream.
77
+
78
+ Args:
79
+ event: The scenario event to publish. Must be a valid ScenarioEvent type.
80
+
81
+ Note:
82
+ Events are processed asynchronously in the background. Use `drain()`
83
+ to wait for all events to be processed after publishing.
84
+ """
85
+ # Convert to Unix timestamp in milliseconds
86
+ event.timestamp = int(datetime.now(UTC).timestamp() * 1000)
87
+ self._events.on_next(event)
88
+
89
+ if isinstance(event, ScenarioRunFinishedEvent):
90
+ self._events.on_completed()
91
+
92
+ async def listen(self) -> None:
93
+ """
94
+ Begins listening for and processing events.
95
+
96
+ This method sets up the RxPY event processing pipeline with concurrent
97
+ processing and automatic retry logic. It should be called before publishing
98
+ any events to ensure proper event handling.
99
+
100
+ The processing pipeline:
101
+ 1. Receives events from the publish() method
102
+ 2. Processes each event concurrently using asyncio tasks
103
+ 3. Automatically retries failed events with exponential backoff
104
+ 4. Completes when a ScenarioRunFinishedEvent is published
105
+
106
+ Note:
107
+ This method is idempotent - calling it multiple times has no effect
108
+ if the processing pipeline is already active.
109
+ """
110
+ if self._processing_task is not None:
111
+ return
112
+
113
+ async def process_single_event(event: ScenarioEvent, attempt: int = 1) -> bool:
114
+ """
115
+ Process a single event with retry logic.
116
+
117
+ Args:
118
+ event: The event to process
119
+ attempt: Current attempt number (1-based)
120
+
121
+ Returns:
122
+ True if processing succeeded, False if all retries failed
123
+ """
124
+ try:
125
+ if self._event_reporter:
126
+ await self._event_reporter.post_event(event)
127
+ return True
128
+ except Exception as e:
129
+ if attempt >= self._max_retries:
130
+ print(f"Failed to process event after {attempt} attempts: {e}")
131
+ return False
132
+ print(
133
+ f"Error processing event (attempt {attempt}/{self._max_retries}): {e}"
134
+ )
135
+ await asyncio.sleep(0.1 * (2 ** (attempt - 1)))
136
+ return await process_single_event(event, attempt + 1)
137
+
138
+ def process_event(event: ScenarioEvent) -> asyncio.Task[bool]:
139
+ """Create an asyncio task to process an event concurrently."""
140
+ loop = asyncio.get_event_loop()
141
+ return loop.create_task(process_single_event(event))
142
+
143
+ # Set up the event processing pipeline with concurrent processing
144
+ self._events.pipe(ops.flat_map(lambda event: process_event(event))).subscribe(
145
+ on_next=lambda success: None,
146
+ on_completed=lambda: self._processing_complete.set(),
147
+ on_error=lambda e: print(f"Unexpected error in event stream: {e}"),
148
+ )
149
+
150
+ async def drain(self) -> None:
151
+ """
152
+ Waits for all events to be processed after the stream is completed.
153
+
154
+ This method blocks until all events in the processing pipeline have been
155
+ handled. It should be called after publishing all events to ensure
156
+ proper cleanup and that no events are lost.
157
+
158
+ Note:
159
+ This method will wait indefinitely if the event stream has not been
160
+ completed (i.e., if no ScenarioRunFinishedEvent has been published).
161
+ """
162
+ await self._processing_complete.wait()
163
+
164
+ def is_completed(self) -> bool:
165
+ """
166
+ Returns whether the event bus has completed processing all events.
167
+
168
+ This method provides a non-blocking way to check if all events have
169
+ been processed. It's useful for monitoring the state of the event bus
170
+ without blocking execution.
171
+
172
+ Returns:
173
+ True if all events have been processed, False otherwise
174
+ """
175
+ return self._processing_complete.is_set()