langwatch-scenario 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/METADATA +2 -2
- {langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/RECORD +18 -17
- scenario/_events/event_alert_message_logger.py +15 -29
- scenario/_events/event_bus.py +4 -1
- scenario/_events/event_reporter.py +8 -3
- scenario/_events/utils.py +3 -3
- scenario/_utils/__init__.py +2 -2
- scenario/_utils/ids.py +12 -12
- scenario/config/model.py +4 -1
- scenario/config/scenario.py +8 -0
- scenario/judge_agent.py +12 -1
- scenario/py.typed +0 -0
- scenario/pytest_plugin.py +5 -0
- scenario/scenario_executor.py +2 -1
- scenario/user_simulator_agent.py +12 -0
- {langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langwatch-scenario
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.9
|
4
4
|
Summary: The end-to-end agent testing library
|
5
5
|
Author-email: LangWatch Team <support@langwatch.ai>
|
6
6
|
License: MIT
|
@@ -457,7 +457,7 @@ This will cache any function call you decorate when running the tests and make t
|
|
457
457
|
While optional, we strongly recommend setting stable identifiers for your scenarios, sets, and batches for better organization and tracking in LangWatch.
|
458
458
|
|
459
459
|
- **set_id**: Groups related scenarios into a test suite. This corresponds to the "Simulation Set" in the UI.
|
460
|
-
- **
|
460
|
+
- **SCENARIO_BATCH_RUN_ID**: Env variable that groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
|
461
461
|
|
462
462
|
```python
|
463
463
|
import os
|
@@ -2,20 +2,21 @@ scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
|
|
2
2
|
scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
|
3
3
|
scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
|
4
4
|
scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
|
5
|
-
scenario/judge_agent.py,sha256=
|
6
|
-
scenario/
|
7
|
-
scenario/
|
5
|
+
scenario/judge_agent.py,sha256=TSwykEWhoBA9F__sUsSuUMpu7pOkT1lIJo8YlEj2eiA,16759
|
6
|
+
scenario/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
scenario/pytest_plugin.py,sha256=wRCuGD9uwrrLt2fY15zK6mnmY9W_dO_m0WalPJYE5II,11491
|
8
|
+
scenario/scenario_executor.py,sha256=_GRpFpw_WtgtaGpxWh0A0HNNf-aU78PdIiVdgEFm9MY,33136
|
8
9
|
scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
|
9
10
|
scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
|
10
11
|
scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
|
11
|
-
scenario/user_simulator_agent.py,sha256=
|
12
|
+
scenario/user_simulator_agent.py,sha256=kqnSd4_gytzEwtkc06r58UdE1EycZBzejRPzfORDjdo,9619
|
12
13
|
scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
|
13
|
-
scenario/_events/event_alert_message_logger.py,sha256=
|
14
|
-
scenario/_events/event_bus.py,sha256=
|
15
|
-
scenario/_events/event_reporter.py,sha256
|
14
|
+
scenario/_events/event_alert_message_logger.py,sha256=n2W3uT8y4x6KKL3H9Ez6CfzJOFlvOfvjDKsdhHUJkxs,2787
|
15
|
+
scenario/_events/event_bus.py,sha256=IsKNsClF1JFYj728EcxX1hw_KbfDkfJq3Y2Kv4h94n4,9871
|
16
|
+
scenario/_events/event_reporter.py,sha256=-6NNbBMy_FYr1O-1FuZ6eIUnLuI8NGRMUr0pybLJrCI,3873
|
16
17
|
scenario/_events/events.py,sha256=UtEGY-_1B0LrwpgsNKgrvJBZhRtxuj3K_i6ZBfF7E4Q,6387
|
17
18
|
scenario/_events/messages.py,sha256=quwP2OkeaGasNOoaV8GUeosZVKc5XDsde08T0xx_YQo,2297
|
18
|
-
scenario/_events/utils.py,sha256=
|
19
|
+
scenario/_events/utils.py,sha256=KKqWFGkj4XtofKxM2yi-DBhBQp8wQOdls48iPHGCmUY,3473
|
19
20
|
scenario/_generated/langwatch_api_client/README.md,sha256=Az5f2L4ChOnG_ZtrdBagzRVgeTCtBkbD_S5cIeAry2o,5424
|
20
21
|
scenario/_generated/langwatch_api_client/pyproject.toml,sha256=Z8wxuGp4H9BJYVVJB8diW7rRU9XYxtPfw9mU4_wq4cA,560
|
21
22
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py,sha256=vVrn17y-3l3fOqeJk8aN3GlStRm2fo0f313l_0LtJNs,368
|
@@ -226,16 +227,16 @@ scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_req
|
|
226
227
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py,sha256=zDYmJ8bFBSJyF9D3cEn_ffrey-ITIfwr-_7eu72zLyk,2832
|
227
228
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py,sha256=-nRKUPZTAJQNxiKz128xF7DKgZNbFo4G3mr5xNXrkaw,2173
|
228
229
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
|
229
|
-
scenario/_utils/__init__.py,sha256=
|
230
|
-
scenario/_utils/ids.py,sha256=
|
230
|
+
scenario/_utils/__init__.py,sha256=xPVjLXnHTTq9fuRFh5lsMvwtIpEeJ3jy1vf5yTUMPsc,1313
|
231
|
+
scenario/_utils/ids.py,sha256=W4tVMCf9ky0KLTDA_qOfErNhb4tCmxwa8zEuo1K1ZuY,2071
|
231
232
|
scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
|
232
233
|
scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
|
233
234
|
scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1123
|
234
235
|
scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
|
235
|
-
scenario/config/model.py,sha256=
|
236
|
-
scenario/config/scenario.py,sha256=
|
237
|
-
langwatch_scenario-0.7.
|
238
|
-
langwatch_scenario-0.7.
|
239
|
-
langwatch_scenario-0.7.
|
240
|
-
langwatch_scenario-0.7.
|
241
|
-
langwatch_scenario-0.7.
|
236
|
+
scenario/config/model.py,sha256=T4HYA79CW1NxXDkFlyftYR6JzZcowbtIx0H-ijxRyfg,1297
|
237
|
+
scenario/config/scenario.py,sha256=6jrtcm0Fo7FpxQta7QIKdGMgl7cXrn374Inzx29hRuk,5406
|
238
|
+
langwatch_scenario-0.7.9.dist-info/METADATA,sha256=0s-yAn8iE1N-5dbqugYFpSl8btZrTyyDgWQDat8szxI,20030
|
239
|
+
langwatch_scenario-0.7.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
240
|
+
langwatch_scenario-0.7.9.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
|
241
|
+
langwatch_scenario-0.7.9.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
|
242
|
+
langwatch_scenario-0.7.9.dist-info/RECORD,,
|
@@ -1,5 +1,8 @@
|
|
1
1
|
import os
|
2
|
+
import webbrowser
|
2
3
|
from typing import Set
|
4
|
+
|
5
|
+
from ..config.scenario import ScenarioConfig
|
3
6
|
from .._utils.ids import get_batch_run_id
|
4
7
|
|
5
8
|
|
@@ -49,35 +52,13 @@ class EventAlertMessageLogger:
|
|
49
52
|
|
50
53
|
if not os.getenv("LANGWATCH_API_KEY"):
|
51
54
|
print(f"\n{separator}")
|
52
|
-
print("
|
55
|
+
print("🎭 Running Scenario Tests")
|
53
56
|
print(f"{separator}")
|
54
|
-
print("➡️ API key not configured")
|
57
|
+
print("➡️ LangWatch API key not configured")
|
55
58
|
print(" Simulations will only output final results")
|
56
59
|
print("")
|
57
60
|
print("💡 To visualize conversations in real time:")
|
58
61
|
print(" • Set LANGWATCH_API_KEY environment variable")
|
59
|
-
print(" • Or configure apiKey in scenario.config.js")
|
60
|
-
print("")
|
61
|
-
print(f"📦 Batch Run ID: {batch_run_id}")
|
62
|
-
print("")
|
63
|
-
print("🔇 To disable these messages:")
|
64
|
-
print(" • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
|
65
|
-
print(f"{separator}\n")
|
66
|
-
else:
|
67
|
-
endpoint = os.getenv("LANGWATCH_ENDPOINT", "https://app.langwatch.ai")
|
68
|
-
api_key = os.getenv("LANGWATCH_API_KEY", "")
|
69
|
-
|
70
|
-
print(f"\n{separator}")
|
71
|
-
print("🚀 LangWatch Simulation Reporting")
|
72
|
-
print(f"{separator}")
|
73
|
-
print("✅ Simulation reporting enabled")
|
74
|
-
print(f" Endpoint: {endpoint}")
|
75
|
-
print(f" API Key: {'Configured' if api_key else 'Not configured'}")
|
76
|
-
print("")
|
77
|
-
print(f"📦 Batch Run ID: {batch_run_id}")
|
78
|
-
print("")
|
79
|
-
print("🔇 To disable these messages:")
|
80
|
-
print(" • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
|
81
62
|
print(f"{separator}\n")
|
82
63
|
|
83
64
|
def _display_watch_message(self, set_url: str) -> None:
|
@@ -86,10 +67,15 @@ class EventAlertMessageLogger:
|
|
86
67
|
batch_url = f"{set_url}/{get_batch_run_id()}"
|
87
68
|
|
88
69
|
print(f"\n{separator}")
|
89
|
-
print("
|
70
|
+
print("🎭 Running Scenario Tests")
|
90
71
|
print(f"{separator}")
|
91
|
-
print("
|
92
|
-
print(f" Scenario Set: {set_url}")
|
93
|
-
print(f" Batch Run: {batch_url}")
|
94
|
-
print("")
|
72
|
+
print(f"Follow it live: {batch_url}")
|
95
73
|
print(f"{separator}\n")
|
74
|
+
|
75
|
+
config = ScenarioConfig.default_config
|
76
|
+
if config and not config.headless:
|
77
|
+
# Open the URL in the default browser (cross-platform)
|
78
|
+
try:
|
79
|
+
webbrowser.open(batch_url)
|
80
|
+
except Exception:
|
81
|
+
pass
|
scenario/_events/event_bus.py
CHANGED
@@ -3,6 +3,7 @@ from typing import Optional, Any, Dict
|
|
3
3
|
from .events import ScenarioEvent
|
4
4
|
from .event_reporter import EventReporter
|
5
5
|
from .event_alert_message_logger import EventAlertMessageLogger
|
6
|
+
from ..config.scenario import ScenarioConfig
|
6
7
|
|
7
8
|
import asyncio
|
8
9
|
import queue
|
@@ -35,7 +36,9 @@ class ScenarioEventBus:
|
|
35
36
|
"""
|
36
37
|
|
37
38
|
def __init__(
|
38
|
-
self,
|
39
|
+
self,
|
40
|
+
event_reporter: Optional[EventReporter] = None,
|
41
|
+
max_retries: int = 3,
|
39
42
|
):
|
40
43
|
"""
|
41
44
|
Initialize the event bus with optional event reporter and retry configuration.
|
@@ -3,7 +3,7 @@ import httpx
|
|
3
3
|
from typing import Optional, Dict, Any
|
4
4
|
from .events import ScenarioEvent
|
5
5
|
from .event_alert_message_logger import EventAlertMessageLogger
|
6
|
-
from scenario.config import LangWatchSettings
|
6
|
+
from scenario.config import LangWatchSettings, ScenarioConfig
|
7
7
|
|
8
8
|
|
9
9
|
class EventReporter:
|
@@ -26,7 +26,11 @@ class EventReporter:
|
|
26
26
|
reporter = EventReporter(api_key="your-api-key")
|
27
27
|
"""
|
28
28
|
|
29
|
-
def __init__(
|
29
|
+
def __init__(
|
30
|
+
self,
|
31
|
+
endpoint: Optional[str] = None,
|
32
|
+
api_key: Optional[str] = None,
|
33
|
+
):
|
30
34
|
# Load settings from environment variables
|
31
35
|
langwatch_settings = LangWatchSettings()
|
32
36
|
|
@@ -69,6 +73,7 @@ class EventReporter:
|
|
69
73
|
"Content-Type": "application/json",
|
70
74
|
"X-Auth-Token": self.api_key,
|
71
75
|
},
|
76
|
+
timeout=httpx.Timeout(30.0),
|
72
77
|
)
|
73
78
|
self.logger.info(
|
74
79
|
f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})"
|
@@ -92,7 +97,7 @@ class EventReporter:
|
|
92
97
|
)
|
93
98
|
except Exception as error:
|
94
99
|
self.logger.error(
|
95
|
-
f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
|
100
|
+
f"[{event_type}] Event POST error: {repr(error)}, event={event}, endpoint={self.endpoint}"
|
96
101
|
)
|
97
102
|
|
98
103
|
return result
|
scenario/_events/utils.py
CHANGED
@@ -10,7 +10,7 @@ from .messages import (
|
|
10
10
|
FunctionCall,
|
11
11
|
)
|
12
12
|
from typing import List
|
13
|
-
import
|
13
|
+
from pksuid import PKSUID
|
14
14
|
|
15
15
|
def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessageParam]) -> list[MessageType]:
|
16
16
|
"""
|
@@ -33,7 +33,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
|
|
33
33
|
|
34
34
|
for i, message in enumerate(messages):
|
35
35
|
# Generate unique ID for each message
|
36
|
-
message_id = message.get("id") or str(
|
36
|
+
message_id = message.get("id") or str(PKSUID('scenariomsg'))
|
37
37
|
|
38
38
|
role = message.get("role")
|
39
39
|
content = message.get("content")
|
@@ -54,7 +54,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
|
|
54
54
|
if tool_calls:
|
55
55
|
for tool_call in tool_calls:
|
56
56
|
api_tool_calls.append(ToolCall(
|
57
|
-
id=tool_call.get("id", str(
|
57
|
+
id=tool_call.get("id", str(PKSUID('scenariotoolcall'))),
|
58
58
|
type_="function",
|
59
59
|
function=FunctionCall(
|
60
60
|
name=tool_call["function"].get("name", "unknown"),
|
scenario/_utils/__init__.py
CHANGED
@@ -14,7 +14,7 @@ from .ids import (
|
|
14
14
|
generate_scenario_id,
|
15
15
|
generate_thread_id,
|
16
16
|
generate_message_id,
|
17
|
-
|
17
|
+
safe_parse_ksuid,
|
18
18
|
)
|
19
19
|
from .utils import (
|
20
20
|
SerializableAndPydanticEncoder,
|
@@ -34,7 +34,7 @@ __all__ = [
|
|
34
34
|
"generate_scenario_id",
|
35
35
|
"generate_thread_id",
|
36
36
|
"generate_message_id",
|
37
|
-
"
|
37
|
+
"safe_parse_ksuid",
|
38
38
|
"SerializableAndPydanticEncoder",
|
39
39
|
"SerializableWithStringFallback",
|
40
40
|
"print_openai_messages",
|
scenario/_utils/ids.py
CHANGED
@@ -7,7 +7,7 @@ and scenario tracking.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
import os
|
10
|
-
import
|
10
|
+
from pksuid import PKSUID
|
11
11
|
|
12
12
|
|
13
13
|
def generate_thread_id() -> str:
|
@@ -17,7 +17,7 @@ def generate_thread_id() -> str:
|
|
17
17
|
Returns:
|
18
18
|
str: A new thread ID.
|
19
19
|
"""
|
20
|
-
return f"
|
20
|
+
return f"{PKSUID('scenariothread')}"
|
21
21
|
|
22
22
|
|
23
23
|
def generate_scenario_run_id() -> str:
|
@@ -27,7 +27,7 @@ def generate_scenario_run_id() -> str:
|
|
27
27
|
Returns:
|
28
28
|
str: A new scenario run ID.
|
29
29
|
"""
|
30
|
-
return f"
|
30
|
+
return f"{PKSUID('scenariorun')}"
|
31
31
|
|
32
32
|
|
33
33
|
def generate_scenario_id() -> str:
|
@@ -37,7 +37,7 @@ def generate_scenario_id() -> str:
|
|
37
37
|
Returns:
|
38
38
|
str: A new scenario ID.
|
39
39
|
"""
|
40
|
-
return f"
|
40
|
+
return f"{PKSUID('scenario')}"
|
41
41
|
|
42
42
|
|
43
43
|
def get_batch_run_id() -> str:
|
@@ -52,7 +52,7 @@ def get_batch_run_id() -> str:
|
|
52
52
|
batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
|
53
53
|
if not batch_run_id:
|
54
54
|
# Generate new batch ID if not set
|
55
|
-
batch_run_id = f"
|
55
|
+
batch_run_id = f"{PKSUID('scenariobatch')}"
|
56
56
|
os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
|
57
57
|
|
58
58
|
return batch_run_id
|
@@ -65,23 +65,23 @@ def generate_message_id() -> str:
|
|
65
65
|
Returns:
|
66
66
|
str: A new message ID.
|
67
67
|
"""
|
68
|
-
return f"
|
68
|
+
return f"{PKSUID('scenariomsg')}"
|
69
69
|
|
70
70
|
|
71
|
-
def
|
71
|
+
def safe_parse_ksuid(id_str: str) -> bool:
|
72
72
|
"""
|
73
|
-
Safely parses a
|
73
|
+
Safely parses a Ksuid string.
|
74
74
|
|
75
75
|
Args:
|
76
|
-
id_str: The
|
76
|
+
id_str: The Ksuid string to parse.
|
77
77
|
|
78
78
|
Returns:
|
79
|
-
bool: True if the
|
79
|
+
bool: True if the Ksuid string is valid, false otherwise.
|
80
80
|
"""
|
81
81
|
try:
|
82
|
-
|
82
|
+
PKSUID.parse(id_str)
|
83
83
|
return True
|
84
|
-
except
|
84
|
+
except Exception:
|
85
85
|
return False
|
86
86
|
|
87
87
|
|
scenario/config/model.py
CHANGED
@@ -17,7 +17,8 @@ class ModelConfig(BaseModel):
|
|
17
17
|
for use with user simulator and judge agents in the Scenario framework.
|
18
18
|
|
19
19
|
Attributes:
|
20
|
-
model: The model identifier (e.g., "openai/gpt-4.1
|
20
|
+
model: The model identifier (e.g., "openai/gpt-4.1", "anthropic/claude-3-sonnet")
|
21
|
+
api_base: Optional base URL where the model is hosted
|
21
22
|
api_key: Optional API key for the model provider
|
22
23
|
temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
|
23
24
|
max_tokens: Maximum number of tokens to generate in responses
|
@@ -26,6 +27,7 @@ class ModelConfig(BaseModel):
|
|
26
27
|
```
|
27
28
|
model_config = ModelConfig(
|
28
29
|
model="openai/gpt-4.1",
|
30
|
+
api_base="https://api.openai.com/v1",
|
29
31
|
api_key="your-api-key",
|
30
32
|
temperature=0.1,
|
31
33
|
max_tokens=1000
|
@@ -34,6 +36,7 @@ class ModelConfig(BaseModel):
|
|
34
36
|
"""
|
35
37
|
|
36
38
|
model: str
|
39
|
+
api_base: Optional[str] = None
|
37
40
|
api_key: Optional[str] = None
|
38
41
|
temperature: float = 0.0
|
39
42
|
max_tokens: Optional[int] = None
|
scenario/config/scenario.py
CHANGED
@@ -5,6 +5,7 @@ This module provides the main configuration class for customizing the behavior
|
|
5
5
|
of the Scenario testing framework, including execution parameters and debugging options.
|
6
6
|
"""
|
7
7
|
|
8
|
+
import os
|
8
9
|
from typing import Optional, Union, ClassVar
|
9
10
|
from pydantic import BaseModel
|
10
11
|
|
@@ -53,6 +54,11 @@ class ScenarioConfig(BaseModel):
|
|
53
54
|
verbose: Optional[Union[bool, int]] = True
|
54
55
|
cache_key: Optional[str] = None
|
55
56
|
debug: Optional[bool] = False
|
57
|
+
headless: Optional[bool] = os.getenv("SCENARIO_HEADLESS", "false").lower() not in [
|
58
|
+
"false",
|
59
|
+
"0",
|
60
|
+
"",
|
61
|
+
]
|
56
62
|
|
57
63
|
default_config: ClassVar[Optional["ScenarioConfig"]] = None
|
58
64
|
|
@@ -64,6 +70,7 @@ class ScenarioConfig(BaseModel):
|
|
64
70
|
verbose: Optional[Union[bool, int]] = None,
|
65
71
|
cache_key: Optional[str] = None,
|
66
72
|
debug: Optional[bool] = None,
|
73
|
+
headless: Optional[bool] = None,
|
67
74
|
) -> None:
|
68
75
|
"""
|
69
76
|
Set global configuration settings for all scenario executions.
|
@@ -107,6 +114,7 @@ class ScenarioConfig(BaseModel):
|
|
107
114
|
verbose=verbose,
|
108
115
|
cache_key=cache_key,
|
109
116
|
debug=debug,
|
117
|
+
headless=headless,
|
110
118
|
)
|
111
119
|
)
|
112
120
|
|
scenario/judge_agent.py
CHANGED
@@ -41,6 +41,7 @@ class JudgeAgent(AgentAdapter):
|
|
41
41
|
Attributes:
|
42
42
|
role: Always AgentRole.JUDGE for judge agents
|
43
43
|
model: LLM model identifier to use for evaluation
|
44
|
+
api_base: Optional base URL where the model is hosted
|
44
45
|
api_key: Optional API key for the model provider
|
45
46
|
temperature: Sampling temperature for evaluation consistency
|
46
47
|
max_tokens: Maximum tokens for judge reasoning
|
@@ -97,6 +98,7 @@ class JudgeAgent(AgentAdapter):
|
|
97
98
|
role = AgentRole.JUDGE
|
98
99
|
|
99
100
|
model: str
|
101
|
+
api_base: Optional[str]
|
100
102
|
api_key: Optional[str]
|
101
103
|
temperature: float
|
102
104
|
max_tokens: Optional[int]
|
@@ -108,6 +110,7 @@ class JudgeAgent(AgentAdapter):
|
|
108
110
|
*,
|
109
111
|
criteria: Optional[List[str]] = None,
|
110
112
|
model: Optional[str] = None,
|
113
|
+
api_base: Optional[str] = None,
|
111
114
|
api_key: Optional[str] = None,
|
112
115
|
temperature: float = 0.0,
|
113
116
|
max_tokens: Optional[int] = None,
|
@@ -122,6 +125,8 @@ class JudgeAgent(AgentAdapter):
|
|
122
125
|
and negative constraints ("Agent should not provide personal information").
|
123
126
|
model: LLM model identifier (e.g., "openai/gpt-4.1").
|
124
127
|
If not provided, uses the default model from global configuration.
|
128
|
+
api_base: Optional base URL where the model is hosted. If not provided,
|
129
|
+
uses the base URL from global configuration.
|
125
130
|
api_key: API key for the model provider. If not provided,
|
126
131
|
uses the key from global configuration or environment.
|
127
132
|
temperature: Sampling temperature for evaluation (0.0-1.0).
|
@@ -156,6 +161,7 @@ class JudgeAgent(AgentAdapter):
|
|
156
161
|
"""
|
157
162
|
# Override the default system prompt for the judge agent
|
158
163
|
self.criteria = criteria or []
|
164
|
+
self.api_base = api_base
|
159
165
|
self.api_key = api_key
|
160
166
|
self.temperature = temperature
|
161
167
|
self.max_tokens = max_tokens
|
@@ -172,6 +178,9 @@ class JudgeAgent(AgentAdapter):
|
|
172
178
|
ScenarioConfig.default_config.default_model, ModelConfig
|
173
179
|
):
|
174
180
|
self.model = model or ScenarioConfig.default_config.default_model.model
|
181
|
+
self.api_base = (
|
182
|
+
api_base or ScenarioConfig.default_config.default_model.api_base
|
183
|
+
)
|
175
184
|
self.api_key = (
|
176
185
|
api_key or ScenarioConfig.default_config.default_model.api_key
|
177
186
|
)
|
@@ -351,6 +360,8 @@ if you don't have enough information to make a verdict, say inconclusive with ma
|
|
351
360
|
model=self.model,
|
352
361
|
messages=messages,
|
353
362
|
temperature=self.temperature,
|
363
|
+
api_key=self.api_key,
|
364
|
+
api_base=self.api_base,
|
354
365
|
max_tokens=self.max_tokens,
|
355
366
|
tools=tools,
|
356
367
|
tool_choice=(
|
@@ -387,7 +398,7 @@ if you don't have enough information to make a verdict, say inconclusive with ma
|
|
387
398
|
failed_criteria = [
|
388
399
|
self.criteria[idx]
|
389
400
|
for idx, criterion in enumerate(criteria.values())
|
390
|
-
if criterion == False
|
401
|
+
if criterion == False or criterion == "inconclusive"
|
391
402
|
]
|
392
403
|
|
393
404
|
# Return the appropriate ScenarioResult based on the verdict
|
scenario/py.typed
ADDED
File without changes
|
scenario/pytest_plugin.py
CHANGED
@@ -199,6 +199,8 @@ class ScenarioReporter:
|
|
199
199
|
# Store the original run method
|
200
200
|
original_run = ScenarioExecutor.run
|
201
201
|
|
202
|
+
def pytest_addoption(parser):
|
203
|
+
parser.addoption("--headless", action="store_true")
|
202
204
|
|
203
205
|
@pytest.hookimpl(trylast=True)
|
204
206
|
def pytest_configure(config):
|
@@ -240,6 +242,9 @@ def pytest_configure(config):
|
|
240
242
|
print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
|
241
243
|
ScenarioConfig.configure(verbose=True, debug=True)
|
242
244
|
|
245
|
+
if config.getoption("--headless"):
|
246
|
+
ScenarioConfig.configure(headless=True)
|
247
|
+
|
243
248
|
# Create a global reporter instance
|
244
249
|
config._scenario_reporter = ScenarioReporter()
|
245
250
|
|
scenario/scenario_executor.py
CHANGED
@@ -153,6 +153,7 @@ class ScenarioExecutor:
|
|
153
153
|
verbose=verbose,
|
154
154
|
cache_key=cache_key,
|
155
155
|
debug=debug,
|
156
|
+
headless=None,
|
156
157
|
)
|
157
158
|
self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
|
158
159
|
|
@@ -198,7 +199,7 @@ class ScenarioExecutor:
|
|
198
199
|
self._state = ScenarioState(
|
199
200
|
description=self.description,
|
200
201
|
messages=[],
|
201
|
-
thread_id=str(PKSUID("
|
202
|
+
thread_id=str(PKSUID("scenariothread")),
|
202
203
|
current_turn=0,
|
203
204
|
config=self.config,
|
204
205
|
_executor=self,
|
scenario/user_simulator_agent.py
CHANGED
@@ -37,6 +37,7 @@ class UserSimulatorAgent(AgentAdapter):
|
|
37
37
|
Attributes:
|
38
38
|
role: Always AgentRole.USER for user simulator agents
|
39
39
|
model: LLM model identifier to use for generating user messages
|
40
|
+
api_base: Optional base URL where the model is hosted
|
40
41
|
api_key: Optional API key for the model provider
|
41
42
|
temperature: Sampling temperature for response generation
|
42
43
|
max_tokens: Maximum tokens to generate in user messages
|
@@ -76,9 +77,11 @@ class UserSimulatorAgent(AgentAdapter):
|
|
76
77
|
- Messages are generated in a casual, human-like style (lowercase, brief, etc.)
|
77
78
|
- The simulator will not act as an assistant - it only generates user inputs
|
78
79
|
"""
|
80
|
+
|
79
81
|
role = AgentRole.USER
|
80
82
|
|
81
83
|
model: str
|
84
|
+
api_base: Optional[str]
|
82
85
|
api_key: Optional[str]
|
83
86
|
temperature: float
|
84
87
|
max_tokens: Optional[int]
|
@@ -88,6 +91,7 @@ class UserSimulatorAgent(AgentAdapter):
|
|
88
91
|
self,
|
89
92
|
*,
|
90
93
|
model: Optional[str] = None,
|
94
|
+
api_base: Optional[str] = None,
|
91
95
|
api_key: Optional[str] = None,
|
92
96
|
temperature: float = 0.0,
|
93
97
|
max_tokens: Optional[int] = None,
|
@@ -99,6 +103,8 @@ class UserSimulatorAgent(AgentAdapter):
|
|
99
103
|
Args:
|
100
104
|
model: LLM model identifier (e.g., "openai/gpt-4.1").
|
101
105
|
If not provided, uses the default model from global configuration.
|
106
|
+
api_base: Optional base URL where the model is hosted. If not provided,
|
107
|
+
uses the base URL from global configuration.
|
102
108
|
api_key: API key for the model provider. If not provided,
|
103
109
|
uses the key from global configuration or environment.
|
104
110
|
temperature: Sampling temperature for message generation (0.0-1.0).
|
@@ -128,6 +134,7 @@ class UserSimulatorAgent(AgentAdapter):
|
|
128
134
|
```
|
129
135
|
"""
|
130
136
|
# Override the default system prompt for the user simulator agent
|
137
|
+
self.api_base = api_base
|
131
138
|
self.api_key = api_key
|
132
139
|
self.temperature = temperature
|
133
140
|
self.max_tokens = max_tokens
|
@@ -144,6 +151,9 @@ class UserSimulatorAgent(AgentAdapter):
|
|
144
151
|
ScenarioConfig.default_config.default_model, ModelConfig
|
145
152
|
):
|
146
153
|
self.model = model or ScenarioConfig.default_config.default_model.model
|
154
|
+
self.api_base = (
|
155
|
+
api_base or ScenarioConfig.default_config.default_model.api_base
|
156
|
+
)
|
147
157
|
self.api_key = (
|
148
158
|
api_key or ScenarioConfig.default_config.default_model.api_key
|
149
159
|
)
|
@@ -222,6 +232,8 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
|
|
222
232
|
model=self.model,
|
223
233
|
messages=messages,
|
224
234
|
temperature=self.temperature,
|
235
|
+
api_key=self.api_key,
|
236
|
+
api_base=self.api_base,
|
225
237
|
max_tokens=self.max_tokens,
|
226
238
|
tools=[],
|
227
239
|
),
|
File without changes
|
File without changes
|
File without changes
|