langwatch-scenario 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ """
2
+ Agent adapter module for integrating custom agents with the Scenario framework.
3
+
4
+ This module provides the abstract base class that users must implement to integrate
5
+ their existing agents with the Scenario testing framework. The adapter pattern allows
6
+ any agent implementation to work with the framework regardless of its underlying
7
+ architecture or API.
8
+ """
9
+
10
+ from abc import ABC, abstractmethod
11
+ from typing import ClassVar
12
+
13
+ from .types import AgentInput, AgentReturnTypes, AgentRole
14
+
15
+
16
+ class AgentAdapter(ABC):
17
+ """
18
+ Abstract base class for integrating custom agents with the Scenario framework.
19
+
20
+ This adapter pattern allows you to wrap any existing agent implementation
21
+ (LLM calls, agent frameworks, or complex multi-step systems) to work with
22
+ the Scenario testing framework. The adapter receives structured input about
23
+ the conversation state and returns responses in a standardized format.
24
+
25
+ Attributes:
26
+ role: The role this agent plays in scenarios (USER, AGENT, or JUDGE)
27
+
28
+ Example:
29
+ ```python
30
+ import scenario
31
+ from my_agent_library import MyCustomAgent
32
+
33
+ class MyAgentAdapter(scenario.AgentAdapter):
34
+ def __init__(self):
35
+ self.agent = MyCustomAgent()
36
+
37
+ async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
38
+ # Get the latest user message
39
+ user_message = input.last_new_user_message_str()
40
+
41
+ # Call your existing agent
42
+ response = await self.agent.process(
43
+ message=user_message,
44
+ history=input.messages,
45
+ thread_id=input.thread_id
46
+ )
47
+
48
+ # Return the response (can be string, message dict, or list of messages)
49
+ return response
50
+
51
+ # Use in a scenario
52
+ result = await scenario.run(
53
+ name="test my agent",
54
+ description="User asks for help with a coding problem",
55
+ agents=[
56
+ MyAgentAdapter(),
57
+ scenario.UserSimulatorAgent(),
58
+ scenario.JudgeAgent(criteria=["Provides helpful coding advice"])
59
+ ]
60
+ )
61
+ ```
62
+
63
+ Note:
64
+ - The call method must be async
65
+ - Return types can be: str, ChatCompletionMessageParam, List[ChatCompletionMessageParam], or ScenarioResult
66
+ - For stateful agents, use input.thread_id to maintain conversation context
67
+ - For stateless agents, use input.messages for the full conversation history
68
+ """
69
+ role: ClassVar[AgentRole] = AgentRole.AGENT
70
+
71
+ @abstractmethod
72
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
73
+ """
74
+ Process the input and generate a response.
75
+
76
+ This is the main method that your agent implementation must provide.
77
+ It receives structured information about the current conversation state
78
+ and must return a response in one of the supported formats.
79
+
80
+ Args:
81
+ input: AgentInput containing conversation history, thread context, and scenario state
82
+
83
+ Returns:
84
+ AgentReturnTypes: The agent's response, which can be:
85
+ - str: Simple text response
86
+ - ChatCompletionMessageParam: Single OpenAI-format message
87
+ - List[ChatCompletionMessageParam]: Multiple messages for complex responses
88
+ - ScenarioResult: Direct test result (typically only used by judge agents)
89
+
90
+ Example:
91
+ ```python
92
+ async def call(self, input: AgentInput) -> AgentReturnTypes:
93
+ # Simple string response
94
+ user_msg = input.last_new_user_message_str()
95
+ return f"I understand you said: {user_msg}"
96
+
97
+ # Or structured message response
98
+ return {
99
+ "role": "assistant",
100
+ "content": "Let me help you with that...",
101
+ "tool_calls": [...] # If your agent uses tools
102
+ }
103
+
104
+ # Or multiple messages for complex interactions
105
+ return [
106
+ {"role": "assistant", "content": "Let me search for that information..."},
107
+ {"role": "assistant", "content": "Here's what I found: ..."}
108
+ ]
109
+ ```
110
+ """
111
+ pass
scenario/cache.py CHANGED
@@ -1,3 +1,12 @@
1
+ """
2
+ Caching module for deterministic scenario testing.
3
+
4
+ This module provides caching functionality to make scenario tests deterministic
5
+ and repeatable. It caches LLM calls and other non-deterministic operations based
6
+ on scenario configuration and function arguments, enabling consistent test results
7
+ across multiple runs.
8
+ """
9
+
1
10
  from contextvars import ContextVar
2
11
  import inspect
3
12
  import os
@@ -8,29 +17,99 @@ from joblib import Memory
8
17
  import json
9
18
 
10
19
  import wrapt
20
+ from scenario.types import AgentInput
11
21
  from scenario.utils import SerializableWithStringFallback
12
22
 
13
23
  if TYPE_CHECKING:
14
- from scenario.scenario import Scenario
24
+ from scenario.scenario_executor import ScenarioExecutor
15
25
 
16
26
 
17
27
  context_scenario = ContextVar("scenario")
18
28
 
29
+
19
30
  def get_cache() -> Memory:
20
- """Get a cross-platform cache directory for scenario."""
31
+ """
32
+ Get a cross-platform cache directory for scenario execution.
33
+
34
+ Creates and returns a joblib Memory instance configured to use a
35
+ cross-platform cache directory. The cache location can be customized
36
+ via the SCENARIO_CACHE_DIR environment variable.
37
+
38
+ Returns:
39
+ Memory instance configured with the appropriate cache directory
40
+
41
+ Example:
42
+ ```python
43
+ # Default cache location: ~/.scenario/cache
44
+ cache = get_cache()
45
+
46
+ # Custom cache location via environment variable
47
+ os.environ["SCENARIO_CACHE_DIR"] = "/tmp/my_scenario_cache"
48
+ cache = get_cache()
49
+ ```
50
+ """
21
51
  home_dir = str(Path.home())
22
52
  cache_dir = os.path.join(home_dir, ".scenario", "cache")
23
53
 
24
54
  return Memory(location=os.environ.get("SCENARIO_CACHE_DIR", cache_dir), verbose=0)
25
55
 
56
+
26
57
  memory = get_cache()
27
58
 
59
+
28
60
  def scenario_cache(ignore=[]):
61
+ """
62
+ Decorator for caching function calls during scenario execution.
63
+
64
+ This decorator caches function calls based on the scenario's cache_key,
65
+ scenario configuration, and function arguments. It enables deterministic
66
+ testing by ensuring the same inputs always produce the same outputs,
67
+ making tests repeatable and faster on subsequent runs.
68
+
69
+ Args:
70
+ ignore: List of argument names to exclude from the cache key computation.
71
+ Commonly used to ignore 'self' for instance methods or other
72
+ non-deterministic arguments.
73
+
74
+ Returns:
75
+ Decorator function that can be applied to any function or method
76
+
77
+ Example:
78
+ ```python
79
+ import scenario
80
+
81
+ class MyAgent:
82
+ @scenario.cache(ignore=["self"])
83
+ def invoke(self, message: str, context: dict) -> str:
84
+ # This LLM call will be cached
85
+ response = llm_client.complete(
86
+ model="gpt-4",
87
+ messages=[{"role": "user", "content": message}]
88
+ )
89
+ return response.choices[0].message.content
90
+
91
+ # Usage in tests
92
+ scenario.configure(cache_key="my-test-suite-v1")
93
+
94
+ # First run: makes actual LLM calls and caches results
95
+ result1 = await scenario.run(...)
96
+
97
+ # Second run: uses cached results, much faster
98
+ result2 = await scenario.run(...)
99
+ # result1 and result2 will be identical
100
+ ```
101
+
102
+ Note:
103
+ - Caching only occurs when a cache_key is set in the scenario configuration
104
+ - The cache key is computed from scenario config, function arguments, and cache_key
105
+ - AgentInput objects are specially handled to exclude thread_id from caching
106
+ - Both sync and async functions are supported
107
+ """
29
108
  @wrapt.decorator
30
109
  def wrapper(wrapped: Callable, instance=None, args=[], kwargs={}):
31
- scenario: "Scenario" = context_scenario.get()
110
+ scenario: "ScenarioExecutor" = context_scenario.get()
32
111
 
33
- if not scenario.cache_key:
112
+ if not scenario.config.cache_key:
34
113
  return wrapped(*args, **kwargs)
35
114
 
36
115
  sig = inspect.signature(wrapped)
@@ -43,20 +122,65 @@ def scenario_cache(ignore=[]):
43
122
  if arg in all_args:
44
123
  del all_args[arg]
45
124
 
125
+ for key, value in all_args.items():
126
+ if isinstance(value, AgentInput):
127
+ scenario_state = value.scenario_state.model_dump(exclude={"thread_id"})
128
+ all_args[key] = value.model_dump(exclude={"thread_id"})
129
+ all_args[key]["scenario_state"] = scenario_state
130
+
46
131
  cache_key = json.dumps(
47
132
  {
48
- "cache_key": scenario.cache_key,
49
- "scenario": scenario.model_dump(exclude={"agent"}),
133
+ "cache_key": scenario.config.cache_key,
134
+ "scenario": scenario.config.model_dump(exclude={"agents"}),
50
135
  "all_args": all_args,
51
136
  },
52
137
  cls=SerializableWithStringFallback,
53
138
  )
54
139
 
55
- return _cached_call(wrapped, args, kwargs, cache_key=cache_key)
140
+ # if is an async function, we need to wrap it in a sync function
141
+ if inspect.iscoroutinefunction(wrapped):
142
+ return _async_cached_call(wrapped, args, kwargs, cache_key=cache_key)
143
+ else:
144
+ return _cached_call(wrapped, args, kwargs, cache_key=cache_key)
56
145
 
57
146
  return wrapper
58
147
 
59
148
 
60
149
  @memory.cache(ignore=["func", "args", "kwargs"])
61
150
  def _cached_call(func: Callable, args, kwargs, cache_key):
62
- return func(*args, **kwargs)
151
+ """
152
+ Internal function for caching synchronous function calls.
153
+
154
+ This function is used internally by the scenario_cache decorator
155
+ to cache synchronous function calls using joblib.Memory.
156
+
157
+ Args:
158
+ func: The function to call and cache
159
+ args: Positional arguments for the function
160
+ kwargs: Keyword arguments for the function
161
+ cache_key: Cache key for deterministic caching
162
+
163
+ Returns:
164
+ The result of calling func(*args, **kwargs)
165
+ """
166
+ return func(*args, **kwargs)
167
+
168
+
169
+ @memory.cache(ignore=["func", "args", "kwargs"])
170
+ async def _async_cached_call(func: Callable, args, kwargs, cache_key):
171
+ """
172
+ Internal function for caching asynchronous function calls.
173
+
174
+ This function is used internally by the scenario_cache decorator
175
+ to cache asynchronous function calls using joblib.Memory.
176
+
177
+ Args:
178
+ func: The async function to call and cache
179
+ args: Positional arguments for the function
180
+ kwargs: Keyword arguments for the function
181
+ cache_key: Cache key for deterministic caching
182
+
183
+ Returns:
184
+ The result of calling await func(*args, **kwargs)
185
+ """
186
+ return await func(*args, **kwargs)
scenario/config.py CHANGED
@@ -1,33 +1,164 @@
1
1
  """
2
2
  Configuration module for Scenario.
3
+
4
+ This module provides configuration classes for customizing the behavior of the
5
+ Scenario testing framework, including LLM model settings, execution parameters,
6
+ and debugging options.
3
7
  """
4
8
 
5
- from typing import TYPE_CHECKING, Any, Optional, Type, Union
9
+ from typing import Optional, Union, ClassVar
6
10
  from pydantic import BaseModel
7
11
 
8
- if TYPE_CHECKING:
9
- from scenario.scenario_agent_adapter import ScenarioAgentAdapter
12
+ class ModelConfig(BaseModel):
13
+ """
14
+ Configuration for LLM model settings.
10
15
 
11
- ScenarioAgentType = ScenarioAgentAdapter
12
- else:
13
- ScenarioAgentType = Any
16
+ This class encapsulates all the parameters needed to configure an LLM model
17
+ for use with user simulator and judge agents in the Scenario framework.
18
+
19
+ Attributes:
20
+ model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
21
+ api_key: Optional API key for the model provider
22
+ temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
23
+ max_tokens: Maximum number of tokens to generate in responses
24
+
25
+ Example:
26
+ ```python
27
+ model_config = ModelConfig(
28
+ model="openai/gpt-4.1-mini",
29
+ api_key="your-api-key",
30
+ temperature=0.1,
31
+ max_tokens=1000
32
+ )
33
+ ```
34
+ """
35
+ model: str
36
+ api_key: Optional[str] = None
37
+ temperature: float = 0.0
38
+ max_tokens: Optional[int] = None
14
39
 
15
40
 
16
41
  class ScenarioConfig(BaseModel):
17
42
  """
18
- Configuration class for the Scenario library.
43
+ Global configuration class for the Scenario testing framework.
44
+
45
+ This class allows users to set default behavior and parameters that apply
46
+ to all scenario executions, including the LLM model to use for simulator
47
+ and judge agents, execution limits, and debugging options.
19
48
 
20
- This allows users to set global configuration settings for the library,
21
- such as the LLM provider and model to use for the testing agent.
49
+ Attributes:
50
+ default_model: Default LLM model configuration for agents (can be string or ModelConfig)
51
+ max_turns: Maximum number of conversation turns before scenario times out
52
+ verbose: Whether to show detailed output during execution (True/False or verbosity level)
53
+ cache_key: Key for caching scenario results to ensure deterministic behavior
54
+ debug: Whether to enable debug mode with step-by-step interaction
55
+
56
+ Example:
57
+ ```python
58
+ # Configure globally for all scenarios
59
+ scenario.configure(
60
+ default_model="openai/gpt-4.1-mini",
61
+ max_turns=15,
62
+ verbose=True,
63
+ cache_key="my-test-suite-v1",
64
+ debug=False
65
+ )
66
+
67
+ # Or create a specific config instance
68
+ config = ScenarioConfig(
69
+ default_model=ModelConfig(
70
+ model="openai/gpt-4.1-mini",
71
+ temperature=0.2
72
+ ),
73
+ max_turns=20
74
+ )
75
+ ```
22
76
  """
23
77
 
24
- testing_agent: Optional[Type[ScenarioAgentType]] = None
78
+ default_model: Optional[Union[str, ModelConfig]] = None
25
79
  max_turns: Optional[int] = 10
26
80
  verbose: Optional[Union[bool, int]] = True
27
81
  cache_key: Optional[str] = None
28
82
  debug: Optional[bool] = False
29
83
 
84
+ default_config: ClassVar[Optional["ScenarioConfig"]] = None
85
+
86
+ @classmethod
87
+ def configure(
88
+ cls,
89
+ default_model: Optional[str] = None,
90
+ max_turns: Optional[int] = None,
91
+ verbose: Optional[Union[bool, int]] = None,
92
+ cache_key: Optional[str] = None,
93
+ debug: Optional[bool] = None,
94
+ ) -> None:
95
+ """
96
+ Set global configuration settings for all scenario executions.
97
+
98
+ This method allows you to configure default behavior that will be applied
99
+ to all scenarios unless explicitly overridden in individual scenario runs.
100
+
101
+ Args:
102
+ default_model: Default LLM model identifier for user simulator and judge agents
103
+ max_turns: Maximum number of conversation turns before timeout (default: 10)
104
+ verbose: Enable verbose output during scenario execution
105
+ cache_key: Cache key for deterministic scenario behavior across runs
106
+ debug: Enable debug mode for step-by-step execution with user intervention
107
+
108
+ Example:
109
+ ```python
110
+ import scenario
111
+
112
+ # Set up default configuration
113
+ scenario.configure(
114
+ default_model="openai/gpt-4.1-mini",
115
+ max_turns=15,
116
+ verbose=True,
117
+ debug=False
118
+ )
119
+
120
+ # All subsequent scenario runs will use these defaults
121
+ result = await scenario.run(
122
+ name="my test",
123
+ description="Test scenario",
124
+ agents=[my_agent, scenario.UserSimulatorAgent(), scenario.JudgeAgent()]
125
+ )
126
+ ```
127
+ """
128
+ existing_config = cls.default_config or ScenarioConfig()
129
+
130
+ cls.default_config = existing_config.merge(
131
+ ScenarioConfig(
132
+ default_model=default_model,
133
+ max_turns=max_turns,
134
+ verbose=verbose,
135
+ cache_key=cache_key,
136
+ debug=debug,
137
+ )
138
+ )
139
+
30
140
  def merge(self, other: "ScenarioConfig") -> "ScenarioConfig":
141
+ """
142
+ Merge this configuration with another configuration.
143
+
144
+ Values from the other configuration will override values in this
145
+ configuration where they are not None.
146
+
147
+ Args:
148
+ other: Another ScenarioConfig instance to merge with
149
+
150
+ Returns:
151
+ A new ScenarioConfig instance with merged values
152
+
153
+ Example:
154
+ ```python
155
+ base_config = ScenarioConfig(max_turns=10, verbose=True)
156
+ override_config = ScenarioConfig(max_turns=20)
157
+
158
+ merged = base_config.merge(override_config)
159
+ # Result: max_turns=20, verbose=True
160
+ ```
161
+ """
31
162
  return ScenarioConfig(
32
163
  **{
33
164
  **self.items(),
@@ -36,4 +167,17 @@ class ScenarioConfig(BaseModel):
36
167
  )
37
168
 
38
169
  def items(self):
170
+ """
171
+ Get configuration items as a dictionary.
172
+
173
+ Returns:
174
+ Dictionary of configuration key-value pairs, excluding None values
175
+
176
+ Example:
177
+ ```python
178
+ config = ScenarioConfig(max_turns=15, verbose=True)
179
+ items = config.items()
180
+ # Result: {"max_turns": 15, "verbose": True}
181
+ ```
182
+ """
39
183
  return {k: getattr(self, k) for k in self.model_dump(exclude_none=True).keys()}
@@ -3,48 +3,18 @@ from typing import Any
3
3
  import termcolor
4
4
 
5
5
 
6
- default_config_error_message = f"""
7
-
8
- {termcolor.colored("->", "cyan")} Please set a default config with at least a testing_agent model for running your scenarios at the top of your test file, for example:
9
-
10
- from scenario import Scenario, TestingAgent
11
-
12
- Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
13
- {termcolor.colored("^" * 74, "green")}
14
-
15
- @pytest.mark.agent_test
16
- def test_vegetarian_recipe_agent():
17
- scenario = Scenario(
18
- # ...
19
- )
20
- result = scenario.run()
21
-
22
- assert result.success
23
-
24
-
25
- {termcolor.colored("->", "cyan")} Alternatively, you can set the config specifically for this scenario:
26
-
27
- from scenario import Scenario, TestingAgent
28
-
29
- @pytest.mark.agent_test
30
- def test_vegetarian_recipe_agent():
31
- scenario = Scenario(
32
- # ...
33
- testing_agent=TestingAgent(model="openai/gpt-4o-mini")
34
- {termcolor.colored("^" * 54, "green")}
35
- )
36
- result = scenario.run()
37
-
38
- assert result.success
39
- """
6
+ def agent_not_configured_error_message(class_name: str):
7
+ return f"""
40
8
 
9
+ {termcolor.colored("->", "cyan")} {class_name} was initialized without a model, please set the model when defining the testing agent, for example:
41
10
 
42
- testing_agent_not_configured_error_message = f"""
11
+ {class_name}(model="openai/gpt-4.1-mini")
12
+ {termcolor.colored("^" * (29 + len(class_name)), "green")}
43
13
 
44
- {termcolor.colored("->", "cyan")} Testing agent was initialized without a model, please set the model when defining the testing agent, for example:
14
+ {termcolor.colored("->", "cyan")} Alternatively, you can set the default model globally, for example:
45
15
 
46
- TestingAgent.with_config(model="openai/gpt-4.1-mini")
47
- {termcolor.colored("^" * 53, "green")}
16
+ scenario.configure(default_model="openai/gpt-4.1-mini")
17
+ {termcolor.colored("^" * 55, "green")}
48
18
  """
49
19
 
50
20