langwatch-scenario 0.7.7__py3-none-any.whl → 0.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langwatch-scenario
3
- Version: 0.7.7
3
+ Version: 0.7.8
4
4
  Summary: The end-to-end agent testing library
5
5
  Author-email: LangWatch Team <support@langwatch.ai>
6
6
  License: MIT
@@ -2,13 +2,13 @@ scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
2
2
  scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
3
3
  scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
4
4
  scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
5
- scenario/judge_agent.py,sha256=7NsgeMu6wRMjU_HYTCFqkLma6H2AJuEkw9hJkt11190,16211
5
+ scenario/judge_agent.py,sha256=gWRWzIfHBjAYBRXant6n5fL_E2P3A2IGNvIyp9nUb30,16728
6
6
  scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
7
7
  scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
8
8
  scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
9
9
  scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
10
10
  scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
11
- scenario/user_simulator_agent.py,sha256=UJ75xhqHwoi8-3JkR1AsHDzpHM2Lx-aDSTJ1gnq_SXc,9101
11
+ scenario/user_simulator_agent.py,sha256=kqnSd4_gytzEwtkc06r58UdE1EycZBzejRPzfORDjdo,9619
12
12
  scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
13
13
  scenario/_events/event_alert_message_logger.py,sha256=K0Pu76Gd36lGEEYh8e8r7NMt7J-OQhbw0cZmiwutCOE,3591
14
14
  scenario/_events/event_bus.py,sha256=KFN0OxAQIQXIk_tVrorDoN_YLKVK9dos5SXFALstHgE,9809
@@ -232,10 +232,10 @@ scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-
232
232
  scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
233
233
  scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1123
234
234
  scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
235
- scenario/config/model.py,sha256=Ve49S2FyzUifXJ-SAyKPiNtVqs8BfsYbODu_M5y0c8Y,1155
235
+ scenario/config/model.py,sha256=T4HYA79CW1NxXDkFlyftYR6JzZcowbtIx0H-ijxRyfg,1297
236
236
  scenario/config/scenario.py,sha256=tVVnsUgG6Z0hYZiTDX-GGZz8l8co1HhyTqJUJNPinBk,5184
237
- langwatch_scenario-0.7.7.dist-info/METADATA,sha256=L7h0kgOaIij6MYVCac0EqPu8ODkZNKxDeIrHCSJg2l4,20003
238
- langwatch_scenario-0.7.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
239
- langwatch_scenario-0.7.7.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
240
- langwatch_scenario-0.7.7.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
241
- langwatch_scenario-0.7.7.dist-info/RECORD,,
237
+ langwatch_scenario-0.7.8.dist-info/METADATA,sha256=q7Rk73qwl5ZzaRTEF9IWxLzgCBniCMO8Ku240jVyBLY,20003
238
+ langwatch_scenario-0.7.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
239
+ langwatch_scenario-0.7.8.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
240
+ langwatch_scenario-0.7.8.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
241
+ langwatch_scenario-0.7.8.dist-info/RECORD,,
scenario/config/model.py CHANGED
@@ -17,7 +17,8 @@ class ModelConfig(BaseModel):
17
17
  for use with user simulator and judge agents in the Scenario framework.
18
18
 
19
19
  Attributes:
20
- model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
20
+ model: The model identifier (e.g., "openai/gpt-4.1", "anthropic/claude-3-sonnet")
21
+ api_base: Optional base URL where the model is hosted
21
22
  api_key: Optional API key for the model provider
22
23
  temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
23
24
  max_tokens: Maximum number of tokens to generate in responses
@@ -26,6 +27,7 @@ class ModelConfig(BaseModel):
26
27
  ```
27
28
  model_config = ModelConfig(
28
29
  model="openai/gpt-4.1",
30
+ api_base="https://api.openai.com/v1",
29
31
  api_key="your-api-key",
30
32
  temperature=0.1,
31
33
  max_tokens=1000
@@ -34,6 +36,7 @@ class ModelConfig(BaseModel):
34
36
  """
35
37
 
36
38
  model: str
39
+ api_base: Optional[str] = None
37
40
  api_key: Optional[str] = None
38
41
  temperature: float = 0.0
39
42
  max_tokens: Optional[int] = None
scenario/judge_agent.py CHANGED
@@ -41,6 +41,7 @@ class JudgeAgent(AgentAdapter):
41
41
  Attributes:
42
42
  role: Always AgentRole.JUDGE for judge agents
43
43
  model: LLM model identifier to use for evaluation
44
+ api_base: Optional base URL where the model is hosted
44
45
  api_key: Optional API key for the model provider
45
46
  temperature: Sampling temperature for evaluation consistency
46
47
  max_tokens: Maximum tokens for judge reasoning
@@ -97,6 +98,7 @@ class JudgeAgent(AgentAdapter):
97
98
  role = AgentRole.JUDGE
98
99
 
99
100
  model: str
101
+ api_base: Optional[str]
100
102
  api_key: Optional[str]
101
103
  temperature: float
102
104
  max_tokens: Optional[int]
@@ -108,6 +110,7 @@ class JudgeAgent(AgentAdapter):
108
110
  *,
109
111
  criteria: Optional[List[str]] = None,
110
112
  model: Optional[str] = None,
113
+ api_base: Optional[str] = None,
111
114
  api_key: Optional[str] = None,
112
115
  temperature: float = 0.0,
113
116
  max_tokens: Optional[int] = None,
@@ -122,6 +125,8 @@ class JudgeAgent(AgentAdapter):
122
125
  and negative constraints ("Agent should not provide personal information").
123
126
  model: LLM model identifier (e.g., "openai/gpt-4.1").
124
127
  If not provided, uses the default model from global configuration.
128
+ api_base: Optional base URL where the model is hosted. If not provided,
129
+ uses the base URL from global configuration.
125
130
  api_key: API key for the model provider. If not provided,
126
131
  uses the key from global configuration or environment.
127
132
  temperature: Sampling temperature for evaluation (0.0-1.0).
@@ -156,6 +161,7 @@ class JudgeAgent(AgentAdapter):
156
161
  """
157
162
  # Override the default system prompt for the judge agent
158
163
  self.criteria = criteria or []
164
+ self.api_base = api_base
159
165
  self.api_key = api_key
160
166
  self.temperature = temperature
161
167
  self.max_tokens = max_tokens
@@ -172,6 +178,9 @@ class JudgeAgent(AgentAdapter):
172
178
  ScenarioConfig.default_config.default_model, ModelConfig
173
179
  ):
174
180
  self.model = model or ScenarioConfig.default_config.default_model.model
181
+ self.api_base = (
182
+ api_base or ScenarioConfig.default_config.default_model.api_base
183
+ )
175
184
  self.api_key = (
176
185
  api_key or ScenarioConfig.default_config.default_model.api_key
177
186
  )
@@ -351,6 +360,8 @@ if you don't have enough information to make a verdict, say inconclusive with ma
351
360
  model=self.model,
352
361
  messages=messages,
353
362
  temperature=self.temperature,
363
+ api_key=self.api_key,
364
+ api_base=self.api_base,
354
365
  max_tokens=self.max_tokens,
355
366
  tools=tools,
356
367
  tool_choice=(
@@ -37,6 +37,7 @@ class UserSimulatorAgent(AgentAdapter):
37
37
  Attributes:
38
38
  role: Always AgentRole.USER for user simulator agents
39
39
  model: LLM model identifier to use for generating user messages
40
+ api_base: Optional base URL where the model is hosted
40
41
  api_key: Optional API key for the model provider
41
42
  temperature: Sampling temperature for response generation
42
43
  max_tokens: Maximum tokens to generate in user messages
@@ -76,9 +77,11 @@ class UserSimulatorAgent(AgentAdapter):
76
77
  - Messages are generated in a casual, human-like style (lowercase, brief, etc.)
77
78
  - The simulator will not act as an assistant - it only generates user inputs
78
79
  """
80
+
79
81
  role = AgentRole.USER
80
82
 
81
83
  model: str
84
+ api_base: Optional[str]
82
85
  api_key: Optional[str]
83
86
  temperature: float
84
87
  max_tokens: Optional[int]
@@ -88,6 +91,7 @@ class UserSimulatorAgent(AgentAdapter):
88
91
  self,
89
92
  *,
90
93
  model: Optional[str] = None,
94
+ api_base: Optional[str] = None,
91
95
  api_key: Optional[str] = None,
92
96
  temperature: float = 0.0,
93
97
  max_tokens: Optional[int] = None,
@@ -99,6 +103,8 @@ class UserSimulatorAgent(AgentAdapter):
99
103
  Args:
100
104
  model: LLM model identifier (e.g., "openai/gpt-4.1").
101
105
  If not provided, uses the default model from global configuration.
106
+ api_base: Optional base URL where the model is hosted. If not provided,
107
+ uses the base URL from global configuration.
102
108
  api_key: API key for the model provider. If not provided,
103
109
  uses the key from global configuration or environment.
104
110
  temperature: Sampling temperature for message generation (0.0-1.0).
@@ -128,6 +134,7 @@ class UserSimulatorAgent(AgentAdapter):
128
134
  ```
129
135
  """
130
136
  # Override the default system prompt for the user simulator agent
137
+ self.api_base = api_base
131
138
  self.api_key = api_key
132
139
  self.temperature = temperature
133
140
  self.max_tokens = max_tokens
@@ -144,6 +151,9 @@ class UserSimulatorAgent(AgentAdapter):
144
151
  ScenarioConfig.default_config.default_model, ModelConfig
145
152
  ):
146
153
  self.model = model or ScenarioConfig.default_config.default_model.model
154
+ self.api_base = (
155
+ api_base or ScenarioConfig.default_config.default_model.api_base
156
+ )
147
157
  self.api_key = (
148
158
  api_key or ScenarioConfig.default_config.default_model.api_key
149
159
  )
@@ -222,6 +232,8 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
222
232
  model=self.model,
223
233
  messages=messages,
224
234
  temperature=self.temperature,
235
+ api_key=self.api_key,
236
+ api_base=self.api_base,
225
237
  max_tokens=self.max_tokens,
226
238
  tools=[],
227
239
  ),