langwatch-scenario 0.7.3__py3-none-any.whl → 0.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/METADATA +14 -12
- {langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/RECORD +17 -13
- scenario/__init__.py +1 -1
- scenario/_error_messages.py +2 -2
- scenario/_events/event_alert_message_logger.py +95 -0
- scenario/_events/event_bus.py +90 -30
- scenario/_events/event_reporter.py +43 -28
- scenario/_generated/langwatch_api_client/README.md +27 -17
- scenario/config/__init__.py +43 -0
- scenario/config/langwatch.py +51 -0
- scenario/config/model.py +39 -0
- scenario/{config.py → config/scenario.py} +5 -34
- scenario/judge_agent.py +2 -2
- scenario/user_simulator_agent.py +6 -6
- {langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langwatch-scenario
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.7
|
4
4
|
Summary: The end-to-end agent testing library
|
5
5
|
Author-email: LangWatch Team <support@langwatch.ai>
|
6
6
|
License: MIT
|
@@ -30,12 +30,12 @@ Requires-Dist: pksuid>=1.1.2
|
|
30
30
|
Requires-Dist: httpx>=0.27.0
|
31
31
|
Requires-Dist: rx>=3.2.0
|
32
32
|
Requires-Dist: python-dateutil>=2.9.0.post0
|
33
|
+
Requires-Dist: pydantic-settings>=2.9.1
|
33
34
|
Provides-Extra: dev
|
34
35
|
Requires-Dist: black; extra == "dev"
|
35
36
|
Requires-Dist: isort; extra == "dev"
|
36
37
|
Requires-Dist: pytest-cov; extra == "dev"
|
37
38
|
Requires-Dist: pre-commit; extra == "dev"
|
38
|
-
Requires-Dist: commitizen; extra == "dev"
|
39
39
|
Requires-Dist: pyright; extra == "dev"
|
40
40
|
Requires-Dist: pydantic-ai; extra == "dev"
|
41
41
|
Requires-Dist: function-schema; extra == "dev"
|
@@ -88,7 +88,7 @@ result = await scenario.run(
|
|
88
88
|
# Define the agents that will play this simulation
|
89
89
|
agents=[
|
90
90
|
WeatherAgent(),
|
91
|
-
scenario.UserSimulatorAgent(model="openai/gpt-4.1
|
91
|
+
scenario.UserSimulatorAgent(model="openai/gpt-4.1"),
|
92
92
|
],
|
93
93
|
|
94
94
|
# (Optional) Control the simulation
|
@@ -159,7 +159,7 @@ import pytest
|
|
159
159
|
import scenario
|
160
160
|
import litellm
|
161
161
|
|
162
|
-
scenario.configure(default_model="openai/gpt-4.1
|
162
|
+
scenario.configure(default_model="openai/gpt-4.1")
|
163
163
|
|
164
164
|
|
165
165
|
@pytest.mark.agent_test
|
@@ -189,6 +189,7 @@ async def test_vegetarian_recipe_agent():
|
|
189
189
|
]
|
190
190
|
),
|
191
191
|
],
|
192
|
+
set_id="python-examples",
|
192
193
|
)
|
193
194
|
|
194
195
|
# Assert for pytest to know whether the test passed
|
@@ -202,7 +203,7 @@ import litellm
|
|
202
203
|
@scenario.cache()
|
203
204
|
def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
|
204
205
|
response = litellm.completion(
|
205
|
-
model="openai/gpt-4.1
|
206
|
+
model="openai/gpt-4.1",
|
206
207
|
messages=[
|
207
208
|
{
|
208
209
|
"role": "system",
|
@@ -227,17 +228,17 @@ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
|
|
227
228
|
Save it as `tests/vegetarian-recipe-agent.test.ts`:
|
228
229
|
|
229
230
|
```typescript
|
231
|
+
import scenario, { type AgentAdapter, AgentRole } from "@langwatch/scenario";
|
230
232
|
import { openai } from "@ai-sdk/openai";
|
231
|
-
import * as scenario from "@langwatch/scenario";
|
232
233
|
import { generateText } from "ai";
|
233
234
|
import { describe, it, expect } from "vitest";
|
234
235
|
|
235
236
|
describe("Vegetarian Recipe Agent", () => {
|
236
|
-
const agent:
|
237
|
-
role:
|
237
|
+
const agent: AgentAdapter = {
|
238
|
+
role: AgentRole.AGENT,
|
238
239
|
call: async (input) => {
|
239
240
|
const response = await generateText({
|
240
|
-
model: openai("gpt-4.1
|
241
|
+
model: openai("gpt-4.1"),
|
241
242
|
messages: [
|
242
243
|
{
|
243
244
|
role: "system",
|
@@ -258,7 +259,7 @@ describe("Vegetarian Recipe Agent", () => {
|
|
258
259
|
agent,
|
259
260
|
scenario.userSimulatorAgent(),
|
260
261
|
scenario.judgeAgent({
|
261
|
-
model: openai("gpt-4.1
|
262
|
+
model: openai("gpt-4.1"),
|
262
263
|
criteria: [
|
263
264
|
"Agent should not ask more than two follow-up questions",
|
264
265
|
"Agent should generate a recipe",
|
@@ -268,6 +269,7 @@ describe("Vegetarian Recipe Agent", () => {
|
|
268
269
|
],
|
269
270
|
}),
|
270
271
|
],
|
272
|
+
setId: "javascript-examples",
|
271
273
|
});
|
272
274
|
expect(result.success).toBe(true);
|
273
275
|
});
|
@@ -417,7 +419,7 @@ You can enable debug mode by setting the `debug` field to `True` in the `Scenari
|
|
417
419
|
Debug mode allows you to see the messages in slow motion step by step, and intervene with your own inputs to debug your agent from the middle of the conversation.
|
418
420
|
|
419
421
|
```python
|
420
|
-
scenario.configure(default_model="openai/gpt-4.1
|
422
|
+
scenario.configure(default_model="openai/gpt-4.1", debug=True)
|
421
423
|
```
|
422
424
|
|
423
425
|
or
|
@@ -431,7 +433,7 @@ pytest -s tests/test_vegetarian_recipe_agent.py --debug
|
|
431
433
|
Each time the scenario runs, the testing agent might chose a different input to start, this is good to make sure it covers the variance of real users as well, however we understand that the non-deterministic nature of it might make it less repeatable, costly and harder to debug. To solve for it, you can use the `cache_key` field in the `Scenario.configure` method or in the specific scenario you are running, this will make the testing agent give the same input for given the same scenario:
|
432
434
|
|
433
435
|
```python
|
434
|
-
scenario.configure(default_model="openai/gpt-4.1
|
436
|
+
scenario.configure(default_model="openai/gpt-4.1", cache_key="42")
|
435
437
|
```
|
436
438
|
|
437
439
|
To bust the cache, you can simply pass a different `cache_key`, disable it, or delete the cache files located at `~/.scenario/cache`.
|
@@ -1,22 +1,22 @@
|
|
1
|
-
scenario/__init__.py,sha256=
|
2
|
-
scenario/_error_messages.py,sha256=
|
1
|
+
scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
|
2
|
+
scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
|
3
3
|
scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
|
4
4
|
scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
|
5
|
-
scenario/
|
6
|
-
scenario/judge_agent.py,sha256=d8vORsqpUPIA4yhlBTv5Yi4I2MdcfXselYBTFvfZx-4,16221
|
5
|
+
scenario/judge_agent.py,sha256=7NsgeMu6wRMjU_HYTCFqkLma6H2AJuEkw9hJkt11190,16211
|
7
6
|
scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
|
8
7
|
scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
|
9
8
|
scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
|
10
9
|
scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
|
11
10
|
scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
|
12
|
-
scenario/user_simulator_agent.py,sha256=
|
11
|
+
scenario/user_simulator_agent.py,sha256=UJ75xhqHwoi8-3JkR1AsHDzpHM2Lx-aDSTJ1gnq_SXc,9101
|
13
12
|
scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
|
14
|
-
scenario/_events/
|
15
|
-
scenario/_events/
|
13
|
+
scenario/_events/event_alert_message_logger.py,sha256=K0Pu76Gd36lGEEYh8e8r7NMt7J-OQhbw0cZmiwutCOE,3591
|
14
|
+
scenario/_events/event_bus.py,sha256=KFN0OxAQIQXIk_tVrorDoN_YLKVK9dos5SXFALstHgE,9809
|
15
|
+
scenario/_events/event_reporter.py,sha256=4uND_kdPBXe-aUWCdSj4BLrMA33TDnbZzokAEOU3_08,3771
|
16
16
|
scenario/_events/events.py,sha256=UtEGY-_1B0LrwpgsNKgrvJBZhRtxuj3K_i6ZBfF7E4Q,6387
|
17
17
|
scenario/_events/messages.py,sha256=quwP2OkeaGasNOoaV8GUeosZVKc5XDsde08T0xx_YQo,2297
|
18
18
|
scenario/_events/utils.py,sha256=SproqiwjhLWAW7p82EirCgawpxAo0ksW1pBB4mKkcEs,3436
|
19
|
-
scenario/_generated/langwatch_api_client/README.md,sha256=
|
19
|
+
scenario/_generated/langwatch_api_client/README.md,sha256=Az5f2L4ChOnG_ZtrdBagzRVgeTCtBkbD_S5cIeAry2o,5424
|
20
20
|
scenario/_generated/langwatch_api_client/pyproject.toml,sha256=Z8wxuGp4H9BJYVVJB8diW7rRU9XYxtPfw9mU4_wq4cA,560
|
21
21
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py,sha256=vVrn17y-3l3fOqeJk8aN3GlStRm2fo0f313l_0LtJNs,368
|
22
22
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py,sha256=o_mdLqyBCQstu5tS1WZFwqIEbGwkvWQ7eQjuCJw_5VY,12419
|
@@ -230,8 +230,12 @@ scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1
|
|
230
230
|
scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
|
231
231
|
scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
|
232
232
|
scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
langwatch_scenario-0.7.
|
233
|
+
scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1123
|
234
|
+
scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
|
235
|
+
scenario/config/model.py,sha256=Ve49S2FyzUifXJ-SAyKPiNtVqs8BfsYbODu_M5y0c8Y,1155
|
236
|
+
scenario/config/scenario.py,sha256=tVVnsUgG6Z0hYZiTDX-GGZz8l8co1HhyTqJUJNPinBk,5184
|
237
|
+
langwatch_scenario-0.7.7.dist-info/METADATA,sha256=L7h0kgOaIij6MYVCac0EqPu8ODkZNKxDeIrHCSJg2l4,20003
|
238
|
+
langwatch_scenario-0.7.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
239
|
+
langwatch_scenario-0.7.7.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
|
240
|
+
langwatch_scenario-0.7.7.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
|
241
|
+
langwatch_scenario-0.7.7.dist-info/RECORD,,
|
scenario/__init__.py
CHANGED
scenario/_error_messages.py
CHANGED
@@ -8,12 +8,12 @@ def agent_not_configured_error_message(class_name: str):
|
|
8
8
|
|
9
9
|
{termcolor.colored("->", "cyan")} {class_name} was initialized without a model, please set the model when defining the testing agent, for example:
|
10
10
|
|
11
|
-
{class_name}(model="openai/gpt-4.1
|
11
|
+
{class_name}(model="openai/gpt-4.1")
|
12
12
|
{termcolor.colored("^" * (29 + len(class_name)), "green")}
|
13
13
|
|
14
14
|
{termcolor.colored("->", "cyan")} Alternatively, you can set the default model globally, for example:
|
15
15
|
|
16
|
-
scenario.configure(default_model="openai/gpt-4.1
|
16
|
+
scenario.configure(default_model="openai/gpt-4.1")
|
17
17
|
{termcolor.colored("^" * 55, "green")}
|
18
18
|
"""
|
19
19
|
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Set
|
3
|
+
from .._utils.ids import get_batch_run_id
|
4
|
+
|
5
|
+
|
6
|
+
class EventAlertMessageLogger:
|
7
|
+
"""
|
8
|
+
Handles console output of alert messages for scenario events.
|
9
|
+
|
10
|
+
Single responsibility: Display user-friendly messages about event reporting status
|
11
|
+
and simulation watching instructions.
|
12
|
+
"""
|
13
|
+
|
14
|
+
_shown_batch_ids: Set[str] = set()
|
15
|
+
|
16
|
+
def handle_greeting(self) -> None:
|
17
|
+
"""
|
18
|
+
Shows a fancy greeting message about simulation reporting status.
|
19
|
+
Only shows once per batch run to avoid spam.
|
20
|
+
"""
|
21
|
+
if self._is_greeting_disabled():
|
22
|
+
return
|
23
|
+
|
24
|
+
batch_run_id = get_batch_run_id()
|
25
|
+
|
26
|
+
if batch_run_id in EventAlertMessageLogger._shown_batch_ids:
|
27
|
+
return
|
28
|
+
|
29
|
+
EventAlertMessageLogger._shown_batch_ids.add(batch_run_id)
|
30
|
+
self._display_greeting(batch_run_id)
|
31
|
+
|
32
|
+
def handle_watch_message(self, set_url: str) -> None:
|
33
|
+
"""
|
34
|
+
Shows a fancy message about how to watch the simulation.
|
35
|
+
Called when a run started event is received with a session ID.
|
36
|
+
"""
|
37
|
+
if self._is_greeting_disabled():
|
38
|
+
return
|
39
|
+
|
40
|
+
self._display_watch_message(set_url)
|
41
|
+
|
42
|
+
def _is_greeting_disabled(self) -> bool:
|
43
|
+
"""Check if greeting messages are disabled via environment variable."""
|
44
|
+
return bool(os.getenv("SCENARIO_DISABLE_SIMULATION_REPORT_INFO"))
|
45
|
+
|
46
|
+
def _display_greeting(self, batch_run_id: str) -> None:
|
47
|
+
"""Display the greeting message with simulation reporting status."""
|
48
|
+
separator = "─" * 60
|
49
|
+
|
50
|
+
if not os.getenv("LANGWATCH_API_KEY"):
|
51
|
+
print(f"\n{separator}")
|
52
|
+
print("🚀 LangWatch Simulation Reporting")
|
53
|
+
print(f"{separator}")
|
54
|
+
print("➡️ API key not configured")
|
55
|
+
print(" Simulations will only output final results")
|
56
|
+
print("")
|
57
|
+
print("💡 To visualize conversations in real time:")
|
58
|
+
print(" • Set LANGWATCH_API_KEY environment variable")
|
59
|
+
print(" • Or configure apiKey in scenario.config.js")
|
60
|
+
print("")
|
61
|
+
print(f"📦 Batch Run ID: {batch_run_id}")
|
62
|
+
print("")
|
63
|
+
print("🔇 To disable these messages:")
|
64
|
+
print(" • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
|
65
|
+
print(f"{separator}\n")
|
66
|
+
else:
|
67
|
+
endpoint = os.getenv("LANGWATCH_ENDPOINT", "https://app.langwatch.ai")
|
68
|
+
api_key = os.getenv("LANGWATCH_API_KEY", "")
|
69
|
+
|
70
|
+
print(f"\n{separator}")
|
71
|
+
print("🚀 LangWatch Simulation Reporting")
|
72
|
+
print(f"{separator}")
|
73
|
+
print("✅ Simulation reporting enabled")
|
74
|
+
print(f" Endpoint: {endpoint}")
|
75
|
+
print(f" API Key: {'Configured' if api_key else 'Not configured'}")
|
76
|
+
print("")
|
77
|
+
print(f"📦 Batch Run ID: {batch_run_id}")
|
78
|
+
print("")
|
79
|
+
print("🔇 To disable these messages:")
|
80
|
+
print(" • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
|
81
|
+
print(f"{separator}\n")
|
82
|
+
|
83
|
+
def _display_watch_message(self, set_url: str) -> None:
|
84
|
+
"""Display the watch message with URLs for viewing the simulation."""
|
85
|
+
separator = "─" * 60
|
86
|
+
batch_url = f"{set_url}/{get_batch_run_id()}"
|
87
|
+
|
88
|
+
print(f"\n{separator}")
|
89
|
+
print("👀 Watch Your Simulation Live")
|
90
|
+
print(f"{separator}")
|
91
|
+
print("🌐 Open in your browser:")
|
92
|
+
print(f" Scenario Set: {set_url}")
|
93
|
+
print(f" Batch Run: {batch_url}")
|
94
|
+
print("")
|
95
|
+
print(f"{separator}\n")
|
scenario/_events/event_bus.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
from rx.core.observable.observable import Observable
|
2
|
-
from typing import Optional, Any
|
2
|
+
from typing import Optional, Any, Dict
|
3
3
|
from .events import ScenarioEvent
|
4
4
|
from .event_reporter import EventReporter
|
5
|
+
from .event_alert_message_logger import EventAlertMessageLogger
|
5
6
|
|
6
7
|
import asyncio
|
7
8
|
import queue
|
8
9
|
import threading
|
9
10
|
import logging
|
10
11
|
|
12
|
+
|
11
13
|
class ScenarioEventBus:
|
12
14
|
"""
|
13
15
|
Subscribes to scenario event streams and handles HTTP posting using a dedicated worker thread.
|
@@ -24,6 +26,7 @@ class ScenarioEventBus:
|
|
24
26
|
|
25
27
|
Attributes:
|
26
28
|
_event_reporter: EventReporter instance for HTTP posting of events
|
29
|
+
_event_alert_message_logger: EventAlertMessageLogger for user-friendly console output
|
27
30
|
_max_retries: Maximum number of retry attempts for failed event processing
|
28
31
|
_event_queue: Thread-safe queue for passing events to worker thread
|
29
32
|
_completed: Whether the event stream has completed
|
@@ -44,11 +47,12 @@ class ScenarioEventBus:
|
|
44
47
|
Defaults to 3 attempts with exponential backoff.
|
45
48
|
"""
|
46
49
|
self._event_reporter: EventReporter = event_reporter or EventReporter()
|
50
|
+
self._event_alert_message_logger = EventAlertMessageLogger()
|
47
51
|
self._max_retries = max_retries
|
48
|
-
|
52
|
+
|
49
53
|
# Custom logger for this class
|
50
54
|
self.logger = logging.getLogger(__name__)
|
51
|
-
|
55
|
+
|
52
56
|
# Threading infrastructure
|
53
57
|
self._event_queue: queue.Queue[ScenarioEvent] = queue.Queue()
|
54
58
|
self._completed = False
|
@@ -61,9 +65,7 @@ class ScenarioEventBus:
|
|
61
65
|
if self._worker_thread is None or not self._worker_thread.is_alive():
|
62
66
|
self.logger.debug("Creating new worker thread")
|
63
67
|
self._worker_thread = threading.Thread(
|
64
|
-
target=self._worker_loop,
|
65
|
-
daemon=False,
|
66
|
-
name="ScenarioEventBus-Worker"
|
68
|
+
target=self._worker_loop, daemon=False, name="ScenarioEventBus-Worker"
|
67
69
|
)
|
68
70
|
self._worker_thread.start()
|
69
71
|
self.logger.debug("Worker thread started")
|
@@ -76,52 +78,108 @@ class ScenarioEventBus:
|
|
76
78
|
if self._shutdown_event.wait(timeout=0.1):
|
77
79
|
self.logger.debug("Worker thread received shutdown signal")
|
78
80
|
break
|
79
|
-
|
81
|
+
|
80
82
|
try:
|
81
83
|
event = self._event_queue.get(timeout=0.1)
|
82
|
-
self.logger.debug(
|
84
|
+
self.logger.debug(
|
85
|
+
f"Worker picked up event: {event.type_} ({event.scenario_run_id})"
|
86
|
+
)
|
83
87
|
self._process_event_sync(event)
|
84
88
|
self._event_queue.task_done()
|
85
89
|
except queue.Empty:
|
86
90
|
# Exit if stream completed and no more events
|
87
91
|
if self._completed:
|
88
|
-
self.logger.debug(
|
92
|
+
self.logger.debug(
|
93
|
+
"Stream completed and no more events, worker thread exiting"
|
94
|
+
)
|
89
95
|
break
|
90
96
|
continue
|
91
|
-
|
97
|
+
|
92
98
|
except Exception as e:
|
93
99
|
self.logger.error(f"Worker thread error: {e}")
|
94
|
-
|
100
|
+
|
95
101
|
self.logger.debug("Worker thread loop ended")
|
96
102
|
|
97
103
|
def _process_event_sync(self, event: ScenarioEvent) -> None:
|
98
104
|
"""
|
99
105
|
Process event synchronously in worker thread with retry logic.
|
100
106
|
"""
|
101
|
-
self.logger.debug(
|
102
|
-
|
107
|
+
self.logger.debug(
|
108
|
+
f"Processing HTTP post for {event.type_} ({event.scenario_run_id})"
|
109
|
+
)
|
110
|
+
|
103
111
|
try:
|
104
|
-
|
105
|
-
|
106
|
-
if not success:
|
107
|
-
self.logger.warning(f"Failed to process event {event.type_} after {self._max_retries} attempts")
|
108
|
-
else:
|
109
|
-
self.logger.debug(f"Successfully posted {event.type_} ({event.scenario_run_id})")
|
112
|
+
result = self._post_event_with_retry(event)
|
113
|
+
self._handle_event_result(event, result)
|
110
114
|
except Exception as e:
|
111
115
|
self.logger.error(f"Error processing event {event.type_}: {e}")
|
112
116
|
|
113
|
-
|
117
|
+
def _post_event_with_retry(self, event: ScenarioEvent) -> Optional[Dict[str, Any]]:
|
118
|
+
"""
|
119
|
+
Post event with retry logic, converting async to sync.
|
120
|
+
"""
|
121
|
+
return asyncio.run(self._process_event_with_retry(event))
|
122
|
+
|
123
|
+
def _handle_event_result(
|
124
|
+
self, event: ScenarioEvent, result: Optional[Dict[str, Any]]
|
125
|
+
) -> None:
|
126
|
+
"""
|
127
|
+
Handle the result of event processing, including logging and watch messages.
|
128
|
+
"""
|
129
|
+
if result is None:
|
130
|
+
self.logger.warning(
|
131
|
+
f"Failed to process event {event.type_} after {self._max_retries} attempts"
|
132
|
+
)
|
133
|
+
return
|
134
|
+
|
135
|
+
self.logger.debug(
|
136
|
+
f"Successfully posted {event.type_} ({event.scenario_run_id})"
|
137
|
+
)
|
138
|
+
|
139
|
+
# Handle watch message for run started events
|
140
|
+
if event.type_ == "SCENARIO_RUN_STARTED" and result.get("setUrl"):
|
141
|
+
self._handle_watch_message(event, result)
|
142
|
+
|
143
|
+
def _handle_watch_message(
|
144
|
+
self, event: ScenarioEvent, result: Dict[str, Any]
|
145
|
+
) -> None:
|
146
|
+
"""
|
147
|
+
Handle watch message for scenario run started events.
|
148
|
+
"""
|
149
|
+
self._event_alert_message_logger.handle_watch_message(
|
150
|
+
set_url=str(result["setUrl"]),
|
151
|
+
)
|
152
|
+
|
153
|
+
def _extract_scenario_set_id(self, event: ScenarioEvent) -> str:
|
154
|
+
"""
|
155
|
+
Extract scenario set ID from event, handling Unset types from generated models.
|
156
|
+
"""
|
157
|
+
scenario_set_id = getattr(event, "scenario_set_id", "default")
|
158
|
+
|
159
|
+
# Handle Unset type from generated models
|
160
|
+
if hasattr(scenario_set_id, "__class__") and "Unset" in str(
|
161
|
+
scenario_set_id.__class__
|
162
|
+
):
|
163
|
+
return "default"
|
164
|
+
|
165
|
+
return str(scenario_set_id)
|
166
|
+
|
167
|
+
async def _process_event_with_retry(
|
168
|
+
self, event: ScenarioEvent, attempt: int = 1
|
169
|
+
) -> Optional[Dict[str, Any]]:
|
114
170
|
"""
|
115
171
|
Process a single event with retry logic (now runs in worker thread context).
|
116
172
|
"""
|
117
173
|
try:
|
118
174
|
if self._event_reporter:
|
119
|
-
await self._event_reporter.post_event(event)
|
120
|
-
return
|
175
|
+
return await self._event_reporter.post_event(event)
|
176
|
+
return {}
|
121
177
|
except Exception as e:
|
122
178
|
if attempt >= self._max_retries:
|
123
|
-
return
|
124
|
-
print(
|
179
|
+
return None
|
180
|
+
print(
|
181
|
+
f"Error processing event (attempt {attempt}/{self._max_retries}): {e}"
|
182
|
+
)
|
125
183
|
await asyncio.sleep(0.1 * (2 ** (attempt - 1))) # Exponential backoff
|
126
184
|
return await self._process_event_with_retry(event, attempt + 1)
|
127
185
|
|
@@ -135,7 +193,9 @@ class ScenarioEventBus:
|
|
135
193
|
return
|
136
194
|
|
137
195
|
def handle_event(event: ScenarioEvent) -> None:
|
138
|
-
self.logger.debug(
|
196
|
+
self.logger.debug(
|
197
|
+
f"Event received, queuing: {event.type_} ({event.scenario_run_id})"
|
198
|
+
)
|
139
199
|
self._get_or_create_worker()
|
140
200
|
self._event_queue.put(event)
|
141
201
|
self.logger.debug(f"Event queued: {event.type_} ({event.scenario_run_id})")
|
@@ -144,7 +204,7 @@ class ScenarioEventBus:
|
|
144
204
|
self._subscription = event_stream.subscribe(
|
145
205
|
handle_event,
|
146
206
|
lambda e: self.logger.error(f"Error in event stream: {e}"),
|
147
|
-
lambda: self._set_completed()
|
207
|
+
lambda: self._set_completed(),
|
148
208
|
)
|
149
209
|
|
150
210
|
def _set_completed(self):
|
@@ -155,17 +215,17 @@ class ScenarioEventBus:
|
|
155
215
|
def drain(self) -> None:
|
156
216
|
"""
|
157
217
|
Waits for all queued events to complete processing.
|
158
|
-
|
218
|
+
|
159
219
|
This method blocks until all events in the queue have been processed.
|
160
220
|
Since _process_event_sync() uses asyncio.run(), HTTP requests complete
|
161
221
|
before task_done() is called, so join() ensures everything is finished.
|
162
222
|
"""
|
163
223
|
self.logger.debug("Drain started - waiting for queue to empty")
|
164
|
-
|
224
|
+
|
165
225
|
# Wait for all events to be processed - this is sufficient!
|
166
226
|
self._event_queue.join()
|
167
227
|
self.logger.debug("Event queue drained")
|
168
|
-
|
228
|
+
|
169
229
|
# Signal worker to shutdown and wait for it
|
170
230
|
self._shutdown_event.set()
|
171
231
|
if self._worker_thread and self._worker_thread.is_alive():
|
@@ -175,7 +235,7 @@ class ScenarioEventBus:
|
|
175
235
|
self.logger.warning("Worker thread did not shutdown within timeout")
|
176
236
|
else:
|
177
237
|
self.logger.debug("Worker thread shutdown complete")
|
178
|
-
|
238
|
+
|
179
239
|
self.logger.info("Drain completed")
|
180
240
|
|
181
241
|
def is_completed(self) -> bool:
|
@@ -1,8 +1,9 @@
|
|
1
1
|
import logging
|
2
|
-
import os
|
3
2
|
import httpx
|
4
|
-
from typing import Optional
|
3
|
+
from typing import Optional, Dict, Any
|
5
4
|
from .events import ScenarioEvent
|
5
|
+
from .event_alert_message_logger import EventAlertMessageLogger
|
6
|
+
from scenario.config import LangWatchSettings
|
6
7
|
|
7
8
|
|
8
9
|
class EventReporter:
|
@@ -13,51 +14,54 @@ class EventReporter:
|
|
13
14
|
with proper authentication and error handling.
|
14
15
|
|
15
16
|
Args:
|
16
|
-
endpoint (str, optional):
|
17
|
-
api_key (str, optional):
|
17
|
+
endpoint (str, optional): Override endpoint URL. If not provided, uses LANGWATCH_ENDPOINT env var.
|
18
|
+
api_key (str, optional): Override API key. If not provided, uses LANGWATCH_API_KEY env var.
|
18
19
|
|
19
20
|
Example:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
"name": "test",
|
27
|
-
"description": "test scenario"
|
28
|
-
}
|
29
|
-
}
|
30
|
-
|
31
|
-
reporter = EventReporter(endpoint="https://api.langwatch.ai", api_key="test-api-key")
|
32
|
-
await reporter.post_event(event)
|
21
|
+
# Using environment variables (LANGWATCH_ENDPOINT, LANGWATCH_API_KEY)
|
22
|
+
reporter = EventReporter()
|
23
|
+
|
24
|
+
# Override specific values
|
25
|
+
reporter = EventReporter(endpoint="https://langwatch.yourdomain.com")
|
26
|
+
reporter = EventReporter(api_key="your-api-key")
|
33
27
|
"""
|
34
28
|
|
35
29
|
def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
|
36
|
-
|
37
|
-
|
30
|
+
# Load settings from environment variables
|
31
|
+
langwatch_settings = LangWatchSettings()
|
32
|
+
|
33
|
+
# Allow constructor parameters to override settings
|
34
|
+
self.endpoint = endpoint or langwatch_settings.endpoint
|
35
|
+
self.api_key = api_key or langwatch_settings.api_key
|
38
36
|
self.logger = logging.getLogger(__name__)
|
37
|
+
self.event_alert_message_logger = EventAlertMessageLogger()
|
38
|
+
|
39
|
+
# Show greeting message when reporter is initialized
|
40
|
+
self.event_alert_message_logger.handle_greeting()
|
39
41
|
|
40
|
-
async def post_event(self, event: ScenarioEvent):
|
42
|
+
async def post_event(self, event: ScenarioEvent) -> Dict[str, Any]:
|
41
43
|
"""
|
42
44
|
Posts an event to the configured endpoint.
|
43
45
|
|
44
46
|
Args:
|
45
|
-
event: A
|
47
|
+
event: A ScenarioEvent containing the event data
|
46
48
|
|
47
49
|
Returns:
|
48
|
-
|
50
|
+
Dict containing response data, including setUrl if available
|
49
51
|
"""
|
50
52
|
event_type = event.type_
|
51
53
|
self.logger.info(f"[{event_type}] Publishing event ({event.scenario_run_id})")
|
52
54
|
|
55
|
+
result: Dict[str, Any] = {}
|
56
|
+
|
53
57
|
if not self.endpoint:
|
54
58
|
self.logger.warning(
|
55
59
|
"No LANGWATCH_ENDPOINT configured, skipping event posting"
|
56
60
|
)
|
57
|
-
return
|
61
|
+
return result
|
58
62
|
|
59
63
|
try:
|
60
|
-
async with httpx.AsyncClient() as client:
|
64
|
+
async with httpx.AsyncClient(follow_redirects=True) as client:
|
61
65
|
response = await client.post(
|
62
66
|
f"{self.endpoint}/api/scenario-events",
|
63
67
|
json=event.to_dict(),
|
@@ -66,11 +70,19 @@ class EventReporter:
|
|
66
70
|
"X-Auth-Token": self.api_key,
|
67
71
|
},
|
68
72
|
)
|
69
|
-
self.logger.info(
|
70
|
-
|
73
|
+
self.logger.info(
|
74
|
+
f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})"
|
75
|
+
)
|
76
|
+
|
71
77
|
if response.is_success:
|
72
78
|
data = response.json()
|
73
|
-
self.logger.info(
|
79
|
+
self.logger.info(
|
80
|
+
f"[{event_type}] POST response: {data} ({event.scenario_run_id})"
|
81
|
+
)
|
82
|
+
|
83
|
+
# Extract setUrl from response if available
|
84
|
+
if isinstance(data, dict) and "url" in data:
|
85
|
+
result["setUrl"] = data["url"]
|
74
86
|
else:
|
75
87
|
error_text = response.text
|
76
88
|
self.logger.error(
|
@@ -80,4 +92,7 @@ class EventReporter:
|
|
80
92
|
)
|
81
93
|
except Exception as error:
|
82
94
|
self.logger.error(
|
83
|
-
f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
|
95
|
+
f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
|
96
|
+
)
|
97
|
+
|
98
|
+
return result
|
@@ -1,15 +1,19 @@
|
|
1
1
|
# lang-watch-api-client
|
2
|
+
|
2
3
|
**⚠️ AUTO-GENERATED CODE - DO NOT EDIT MANUALLY ⚠️**
|
3
4
|
|
4
5
|
This is an auto-generated client library for accessing LangWatch API, created using `openapi-python-client`.
|
5
6
|
|
6
7
|
## Regeneration
|
8
|
+
|
7
9
|
To regenerate this client:
|
10
|
+
|
8
11
|
```bash
|
9
12
|
make generate-openapi-client
|
10
13
|
```
|
11
14
|
|
12
15
|
## Source
|
16
|
+
|
13
17
|
Generated from: `../langwatch-saas/langwatch/langwatch/src/app/api/openapiLangWatch.json`
|
14
18
|
|
15
19
|
---
|
@@ -17,12 +21,13 @@ Generated from: `../langwatch-saas/langwatch/langwatch/src/app/api/openapiLangWa
|
|
17
21
|
A client library for accessing LangWatch API
|
18
22
|
|
19
23
|
## Usage
|
24
|
+
|
20
25
|
First, create a client:
|
21
26
|
|
22
27
|
```python
|
23
28
|
from lang_watch_api_client import Client
|
24
29
|
|
25
|
-
client = Client(base_url="https://
|
30
|
+
client = Client(base_url="https://app.langwatch.ai")
|
26
31
|
```
|
27
32
|
|
28
33
|
If the endpoints you're going to hit require authentication, use `AuthenticatedClient` instead:
|
@@ -30,7 +35,7 @@ If the endpoints you're going to hit require authentication, use `AuthenticatedC
|
|
30
35
|
```python
|
31
36
|
from lang_watch_api_client import AuthenticatedClient
|
32
37
|
|
33
|
-
client = AuthenticatedClient(base_url="https://
|
38
|
+
client = AuthenticatedClient(base_url="https://app.langwatch.ai", token="SuperSecretToken")
|
34
39
|
```
|
35
40
|
|
36
41
|
Now call your endpoint and use your models:
|
@@ -62,7 +67,7 @@ By default, when you're calling an HTTPS API it will attempt to verify that SSL
|
|
62
67
|
|
63
68
|
```python
|
64
69
|
client = AuthenticatedClient(
|
65
|
-
base_url="https://
|
70
|
+
base_url="https://app.langwatch.ai",
|
66
71
|
token="SuperSecretToken",
|
67
72
|
verify_ssl="/path/to/certificate_bundle.pem",
|
68
73
|
)
|
@@ -72,18 +77,20 @@ You can also disable certificate validation altogether, but beware that **this i
|
|
72
77
|
|
73
78
|
```python
|
74
79
|
client = AuthenticatedClient(
|
75
|
-
base_url="https://
|
76
|
-
token="SuperSecretToken",
|
80
|
+
base_url="https://app.langwatch.ai",
|
81
|
+
token="SuperSecretToken",
|
77
82
|
verify_ssl=False
|
78
83
|
)
|
79
84
|
```
|
80
85
|
|
81
86
|
Things to know:
|
87
|
+
|
82
88
|
1. Every path/method combo becomes a Python module with four functions:
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
89
|
+
|
90
|
+
1. `sync`: Blocking request that returns parsed data (if successful) or `None`
|
91
|
+
1. `sync_detailed`: Blocking request that always returns a `Request`, optionally with `parsed` set if the request was successful.
|
92
|
+
1. `asyncio`: Like `sync` but async instead of blocking
|
93
|
+
1. `asyncio_detailed`: Like `sync_detailed` but async instead of blocking
|
87
94
|
|
88
95
|
1. All path/query params, and bodies become method arguments.
|
89
96
|
1. If your endpoint had any tags on it, the first tag will be used as a module name for the function (my_tag above)
|
@@ -104,7 +111,7 @@ def log_response(response):
|
|
104
111
|
print(f"Response event hook: {request.method} {request.url} - Status {response.status_code}")
|
105
112
|
|
106
113
|
client = Client(
|
107
|
-
base_url="https://
|
114
|
+
base_url="https://app.langwatch.ai",
|
108
115
|
httpx_args={"event_hooks": {"request": [log_request], "response": [log_response]}},
|
109
116
|
)
|
110
117
|
|
@@ -118,22 +125,25 @@ import httpx
|
|
118
125
|
from lang_watch_api_client import Client
|
119
126
|
|
120
127
|
client = Client(
|
121
|
-
base_url="https://
|
128
|
+
base_url="https://app.langwatch.ai",
|
122
129
|
)
|
123
130
|
# Note that base_url needs to be re-set, as would any shared cookies, headers, etc.
|
124
|
-
client.set_httpx_client(httpx.Client(base_url="https://
|
131
|
+
client.set_httpx_client(httpx.Client(base_url="https://app.langwatch.ai", proxies="http://localhost:8030"))
|
125
132
|
```
|
126
133
|
|
127
134
|
## Building / publishing this package
|
128
|
-
|
135
|
+
|
136
|
+
This project uses [Poetry](https://python-poetry.org/) to manage dependencies and packaging. Here are the basics:
|
137
|
+
|
129
138
|
1. Update the metadata in pyproject.toml (e.g. authors, version)
|
130
139
|
1. If you're using a private repository, configure it with Poetry
|
131
|
-
|
132
|
-
|
140
|
+
1. `poetry config repositories.<your-repository-name> <url-to-your-repository>`
|
141
|
+
1. `poetry config http-basic.<your-repository-name> <username> <password>`
|
133
142
|
1. Publish the client with `poetry publish --build -r <your-repository-name>` or, if for public PyPI, just `poetry publish --build`
|
134
143
|
|
135
144
|
If you want to install this client into another project without publishing it (e.g. for development) then:
|
145
|
+
|
136
146
|
1. If that project **is using Poetry**, you can simply do `poetry add <path-to-this-client>` from that project
|
137
147
|
1. If that project is not using Poetry:
|
138
|
-
|
139
|
-
|
148
|
+
1. Build a wheel with `poetry build -f wheel`
|
149
|
+
1. Install that wheel from the other project `pip install <path-to-wheel>`
|
@@ -0,0 +1,43 @@
|
|
1
|
+
"""
|
2
|
+
Configuration module for Scenario.
|
3
|
+
|
4
|
+
This module provides all configuration classes for customizing the behavior
|
5
|
+
of the Scenario testing framework, including model settings, scenario execution
|
6
|
+
parameters, and LangWatch integration.
|
7
|
+
|
8
|
+
Classes:
|
9
|
+
ModelConfig: Configuration for LLM model settings
|
10
|
+
ScenarioConfig: Main configuration for scenario execution
|
11
|
+
LangWatchSettings: Configuration for LangWatch API integration
|
12
|
+
|
13
|
+
Example:
|
14
|
+
```
|
15
|
+
from scenario.config import ModelConfig, ScenarioConfig, LangWatchSettings
|
16
|
+
|
17
|
+
# Configure LLM model
|
18
|
+
model_config = ModelConfig(
|
19
|
+
model="openai/gpt-4.1-mini",
|
20
|
+
temperature=0.1
|
21
|
+
)
|
22
|
+
|
23
|
+
# Configure scenario execution
|
24
|
+
scenario_config = ScenarioConfig(
|
25
|
+
default_model=model_config,
|
26
|
+
max_turns=15,
|
27
|
+
verbose=True
|
28
|
+
)
|
29
|
+
|
30
|
+
# Configure LangWatch integration
|
31
|
+
langwatch_settings = LangWatchSettings() # Reads from environment
|
32
|
+
```
|
33
|
+
"""
|
34
|
+
|
35
|
+
from .model import ModelConfig
|
36
|
+
from .scenario import ScenarioConfig
|
37
|
+
from .langwatch import LangWatchSettings
|
38
|
+
|
39
|
+
__all__ = [
|
40
|
+
"ModelConfig",
|
41
|
+
"ScenarioConfig",
|
42
|
+
"LangWatchSettings",
|
43
|
+
]
|
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
LangWatch configuration for Scenario.
|
3
|
+
|
4
|
+
This module provides configuration for LangWatch API integration,
|
5
|
+
including endpoint URLs and authentication credentials.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from pydantic import Field, HttpUrl
|
9
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
10
|
+
|
11
|
+
|
12
|
+
class LangWatchSettings(BaseSettings):
|
13
|
+
"""
|
14
|
+
Configuration for LangWatch API integration.
|
15
|
+
|
16
|
+
This class handles configuration for connecting to LangWatch services,
|
17
|
+
automatically reading from environment variables with the LANGWATCH_ prefix.
|
18
|
+
|
19
|
+
Attributes:
|
20
|
+
endpoint: LangWatch API endpoint URL
|
21
|
+
api_key: API key for LangWatch authentication
|
22
|
+
|
23
|
+
Environment Variables:
|
24
|
+
LANGWATCH_ENDPOINT: LangWatch API endpoint (defaults to https://app.langwatch.ai)
|
25
|
+
LANGWATCH_API_KEY: API key for authentication (defaults to empty string)
|
26
|
+
|
27
|
+
Example:
|
28
|
+
```
|
29
|
+
# Using environment variables
|
30
|
+
# export LANGWATCH_ENDPOINT="https://app.langwatch.ai"
|
31
|
+
# export LANGWATCH_API_KEY="your-api-key"
|
32
|
+
|
33
|
+
settings = LangWatchSettings()
|
34
|
+
print(settings.endpoint) # https://app.langwatch.ai
|
35
|
+
print(settings.api_key) # your-api-key
|
36
|
+
|
37
|
+
# Or override programmatically
|
38
|
+
settings = LangWatchSettings(
|
39
|
+
endpoint="https://custom.langwatch.ai",
|
40
|
+
api_key="your-api-key"
|
41
|
+
)
|
42
|
+
```
|
43
|
+
"""
|
44
|
+
|
45
|
+
model_config = SettingsConfigDict(env_prefix="LANGWATCH_", case_sensitive=False)
|
46
|
+
|
47
|
+
endpoint: HttpUrl = Field(
|
48
|
+
default=HttpUrl("https://app.langwatch.ai"),
|
49
|
+
description="LangWatch API endpoint URL",
|
50
|
+
)
|
51
|
+
api_key: str = Field(default="", description="API key for LangWatch authentication")
|
scenario/config/model.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
"""
|
2
|
+
Model configuration for Scenario.
|
3
|
+
|
4
|
+
This module provides configuration classes for LLM model settings used by
|
5
|
+
user simulator and judge agents in the Scenario framework.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Optional
|
9
|
+
from pydantic import BaseModel
|
10
|
+
|
11
|
+
|
12
|
+
class ModelConfig(BaseModel):
|
13
|
+
"""
|
14
|
+
Configuration for LLM model settings.
|
15
|
+
|
16
|
+
This class encapsulates all the parameters needed to configure an LLM model
|
17
|
+
for use with user simulator and judge agents in the Scenario framework.
|
18
|
+
|
19
|
+
Attributes:
|
20
|
+
model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
|
21
|
+
api_key: Optional API key for the model provider
|
22
|
+
temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
|
23
|
+
max_tokens: Maximum number of tokens to generate in responses
|
24
|
+
|
25
|
+
Example:
|
26
|
+
```
|
27
|
+
model_config = ModelConfig(
|
28
|
+
model="openai/gpt-4.1",
|
29
|
+
api_key="your-api-key",
|
30
|
+
temperature=0.1,
|
31
|
+
max_tokens=1000
|
32
|
+
)
|
33
|
+
```
|
34
|
+
"""
|
35
|
+
|
36
|
+
model: str
|
37
|
+
api_key: Optional[str] = None
|
38
|
+
temperature: float = 0.0
|
39
|
+
max_tokens: Optional[int] = None
|
@@ -1,43 +1,14 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
Scenario configuration for Scenario.
|
3
3
|
|
4
|
-
This module provides configuration
|
5
|
-
Scenario testing framework, including
|
6
|
-
and debugging options.
|
4
|
+
This module provides the main configuration class for customizing the behavior
|
5
|
+
of the Scenario testing framework, including execution parameters and debugging options.
|
7
6
|
"""
|
8
7
|
|
9
8
|
from typing import Optional, Union, ClassVar
|
10
9
|
from pydantic import BaseModel
|
11
10
|
|
12
|
-
|
13
|
-
class ModelConfig(BaseModel):
|
14
|
-
"""
|
15
|
-
Configuration for LLM model settings.
|
16
|
-
|
17
|
-
This class encapsulates all the parameters needed to configure an LLM model
|
18
|
-
for use with user simulator and judge agents in the Scenario framework.
|
19
|
-
|
20
|
-
Attributes:
|
21
|
-
model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
|
22
|
-
api_key: Optional API key for the model provider
|
23
|
-
temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
|
24
|
-
max_tokens: Maximum number of tokens to generate in responses
|
25
|
-
|
26
|
-
Example:
|
27
|
-
```
|
28
|
-
model_config = ModelConfig(
|
29
|
-
model="openai/gpt-4.1-mini",
|
30
|
-
api_key="your-api-key",
|
31
|
-
temperature=0.1,
|
32
|
-
max_tokens=1000
|
33
|
-
)
|
34
|
-
```
|
35
|
-
"""
|
36
|
-
|
37
|
-
model: str
|
38
|
-
api_key: Optional[str] = None
|
39
|
-
temperature: float = 0.0
|
40
|
-
max_tokens: Optional[int] = None
|
11
|
+
from .model import ModelConfig
|
41
12
|
|
42
13
|
|
43
14
|
class ScenarioConfig(BaseModel):
|
@@ -69,7 +40,7 @@ class ScenarioConfig(BaseModel):
|
|
69
40
|
# Or create a specific config instance
|
70
41
|
config = ScenarioConfig(
|
71
42
|
default_model=ModelConfig(
|
72
|
-
model="openai/gpt-4.1
|
43
|
+
model="openai/gpt-4.1",
|
73
44
|
temperature=0.2
|
74
45
|
),
|
75
46
|
max_turns=20
|
scenario/judge_agent.py
CHANGED
@@ -62,7 +62,7 @@ class JudgeAgent(AgentAdapter):
|
|
62
62
|
|
63
63
|
# Customized judge with specific model and behavior
|
64
64
|
strict_judge = scenario.JudgeAgent(
|
65
|
-
model="openai/gpt-4.1
|
65
|
+
model="openai/gpt-4.1",
|
66
66
|
criteria=[
|
67
67
|
"Code examples are syntactically correct",
|
68
68
|
"Explanations are technically accurate",
|
@@ -120,7 +120,7 @@ class JudgeAgent(AgentAdapter):
|
|
120
120
|
criteria: List of success criteria to evaluate the conversation against.
|
121
121
|
Can include both positive requirements ("Agent provides helpful responses")
|
122
122
|
and negative constraints ("Agent should not provide personal information").
|
123
|
-
model: LLM model identifier (e.g., "openai/gpt-4.1
|
123
|
+
model: LLM model identifier (e.g., "openai/gpt-4.1").
|
124
124
|
If not provided, uses the default model from global configuration.
|
125
125
|
api_key: API key for the model provider. If not provided,
|
126
126
|
uses the key from global configuration or environment.
|
scenario/user_simulator_agent.py
CHANGED
@@ -48,12 +48,12 @@ class UserSimulatorAgent(AgentAdapter):
|
|
48
48
|
|
49
49
|
# Basic user simulator with default behavior
|
50
50
|
user_sim = scenario.UserSimulatorAgent(
|
51
|
-
model="openai/gpt-4.1
|
51
|
+
model="openai/gpt-4.1"
|
52
52
|
)
|
53
53
|
|
54
54
|
# Customized user simulator
|
55
55
|
custom_user_sim = scenario.UserSimulatorAgent(
|
56
|
-
model="openai/gpt-4.1
|
56
|
+
model="openai/gpt-4.1",
|
57
57
|
temperature=0.3,
|
58
58
|
system_prompt="You are a technical user who asks detailed questions"
|
59
59
|
)
|
@@ -97,7 +97,7 @@ class UserSimulatorAgent(AgentAdapter):
|
|
97
97
|
Initialize a user simulator agent.
|
98
98
|
|
99
99
|
Args:
|
100
|
-
model: LLM model identifier (e.g., "openai/gpt-4.1
|
100
|
+
model: LLM model identifier (e.g., "openai/gpt-4.1").
|
101
101
|
If not provided, uses the default model from global configuration.
|
102
102
|
api_key: API key for the model provider. If not provided,
|
103
103
|
uses the key from global configuration or environment.
|
@@ -114,11 +114,11 @@ class UserSimulatorAgent(AgentAdapter):
|
|
114
114
|
Example:
|
115
115
|
```
|
116
116
|
# Basic user simulator
|
117
|
-
user_sim = UserSimulatorAgent(model="openai/gpt-4.1
|
117
|
+
user_sim = UserSimulatorAgent(model="openai/gpt-4.1")
|
118
118
|
|
119
119
|
# User simulator with custom persona
|
120
120
|
expert_user = UserSimulatorAgent(
|
121
|
-
model="openai/gpt-4.1
|
121
|
+
model="openai/gpt-4.1",
|
122
122
|
temperature=0.2,
|
123
123
|
system_prompt='''
|
124
124
|
You are an expert software developer testing an AI coding assistant.
|
@@ -203,7 +203,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
|
|
203
203
|
</scenario>
|
204
204
|
|
205
205
|
<rules>
|
206
|
-
- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
|
206
|
+
- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user, send the user message and just STOP.
|
207
207
|
</rules>
|
208
208
|
""",
|
209
209
|
},
|
File without changes
|
File without changes
|
File without changes
|