langwatch-scenario 0.7.2__py3-none-any.whl → 0.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/METADATA +56 -12
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/RECORD +21 -17
- scenario/__init__.py +1 -1
- scenario/_error_messages.py +2 -2
- scenario/_events/event_alert_message_logger.py +95 -0
- scenario/_events/event_bus.py +90 -30
- scenario/_events/event_reporter.py +43 -28
- scenario/_generated/langwatch_api_client/README.md +27 -17
- scenario/_utils/__init__.py +16 -3
- scenario/_utils/ids.py +76 -38
- scenario/config/__init__.py +43 -0
- scenario/config/langwatch.py +51 -0
- scenario/config/model.py +39 -0
- scenario/{config.py → config/scenario.py} +5 -34
- scenario/judge_agent.py +2 -2
- scenario/scenario_executor.py +16 -4
- scenario/scenario_state.py +2 -1
- scenario/user_simulator_agent.py +6 -6
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
|
|
1
1
|
import logging
|
2
|
-
import os
|
3
2
|
import httpx
|
4
|
-
from typing import Optional
|
3
|
+
from typing import Optional, Dict, Any
|
5
4
|
from .events import ScenarioEvent
|
5
|
+
from .event_alert_message_logger import EventAlertMessageLogger
|
6
|
+
from scenario.config import LangWatchSettings
|
6
7
|
|
7
8
|
|
8
9
|
class EventReporter:
|
@@ -13,51 +14,54 @@ class EventReporter:
|
|
13
14
|
with proper authentication and error handling.
|
14
15
|
|
15
16
|
Args:
|
16
|
-
endpoint (str, optional):
|
17
|
-
api_key (str, optional):
|
17
|
+
endpoint (str, optional): Override endpoint URL. If not provided, uses LANGWATCH_ENDPOINT env var.
|
18
|
+
api_key (str, optional): Override API key. If not provided, uses LANGWATCH_API_KEY env var.
|
18
19
|
|
19
20
|
Example:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
"name": "test",
|
27
|
-
"description": "test scenario"
|
28
|
-
}
|
29
|
-
}
|
30
|
-
|
31
|
-
reporter = EventReporter(endpoint="https://api.langwatch.ai", api_key="test-api-key")
|
32
|
-
await reporter.post_event(event)
|
21
|
+
# Using environment variables (LANGWATCH_ENDPOINT, LANGWATCH_API_KEY)
|
22
|
+
reporter = EventReporter()
|
23
|
+
|
24
|
+
# Override specific values
|
25
|
+
reporter = EventReporter(endpoint="https://langwatch.yourdomain.com")
|
26
|
+
reporter = EventReporter(api_key="your-api-key")
|
33
27
|
"""
|
34
28
|
|
35
29
|
def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
|
36
|
-
|
37
|
-
|
30
|
+
# Load settings from environment variables
|
31
|
+
langwatch_settings = LangWatchSettings()
|
32
|
+
|
33
|
+
# Allow constructor parameters to override settings
|
34
|
+
self.endpoint = endpoint or langwatch_settings.endpoint
|
35
|
+
self.api_key = api_key or langwatch_settings.api_key
|
38
36
|
self.logger = logging.getLogger(__name__)
|
37
|
+
self.event_alert_message_logger = EventAlertMessageLogger()
|
38
|
+
|
39
|
+
# Show greeting message when reporter is initialized
|
40
|
+
self.event_alert_message_logger.handle_greeting()
|
39
41
|
|
40
|
-
async def post_event(self, event: ScenarioEvent):
|
42
|
+
async def post_event(self, event: ScenarioEvent) -> Dict[str, Any]:
|
41
43
|
"""
|
42
44
|
Posts an event to the configured endpoint.
|
43
45
|
|
44
46
|
Args:
|
45
|
-
event: A
|
47
|
+
event: A ScenarioEvent containing the event data
|
46
48
|
|
47
49
|
Returns:
|
48
|
-
|
50
|
+
Dict containing response data, including setUrl if available
|
49
51
|
"""
|
50
52
|
event_type = event.type_
|
51
53
|
self.logger.info(f"[{event_type}] Publishing event ({event.scenario_run_id})")
|
52
54
|
|
55
|
+
result: Dict[str, Any] = {}
|
56
|
+
|
53
57
|
if not self.endpoint:
|
54
58
|
self.logger.warning(
|
55
59
|
"No LANGWATCH_ENDPOINT configured, skipping event posting"
|
56
60
|
)
|
57
|
-
return
|
61
|
+
return result
|
58
62
|
|
59
63
|
try:
|
60
|
-
async with httpx.AsyncClient() as client:
|
64
|
+
async with httpx.AsyncClient(follow_redirects=True) as client:
|
61
65
|
response = await client.post(
|
62
66
|
f"{self.endpoint}/api/scenario-events",
|
63
67
|
json=event.to_dict(),
|
@@ -66,11 +70,19 @@ class EventReporter:
|
|
66
70
|
"X-Auth-Token": self.api_key,
|
67
71
|
},
|
68
72
|
)
|
69
|
-
self.logger.info(
|
70
|
-
|
73
|
+
self.logger.info(
|
74
|
+
f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})"
|
75
|
+
)
|
76
|
+
|
71
77
|
if response.is_success:
|
72
78
|
data = response.json()
|
73
|
-
self.logger.info(
|
79
|
+
self.logger.info(
|
80
|
+
f"[{event_type}] POST response: {data} ({event.scenario_run_id})"
|
81
|
+
)
|
82
|
+
|
83
|
+
# Extract setUrl from response if available
|
84
|
+
if isinstance(data, dict) and "url" in data:
|
85
|
+
result["setUrl"] = data["url"]
|
74
86
|
else:
|
75
87
|
error_text = response.text
|
76
88
|
self.logger.error(
|
@@ -80,4 +92,7 @@ class EventReporter:
|
|
80
92
|
)
|
81
93
|
except Exception as error:
|
82
94
|
self.logger.error(
|
83
|
-
f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
|
95
|
+
f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
|
96
|
+
)
|
97
|
+
|
98
|
+
return result
|
@@ -1,15 +1,19 @@
|
|
1
1
|
# lang-watch-api-client
|
2
|
+
|
2
3
|
**⚠️ AUTO-GENERATED CODE - DO NOT EDIT MANUALLY ⚠️**
|
3
4
|
|
4
5
|
This is an auto-generated client library for accessing LangWatch API, created using `openapi-python-client`.
|
5
6
|
|
6
7
|
## Regeneration
|
8
|
+
|
7
9
|
To regenerate this client:
|
10
|
+
|
8
11
|
```bash
|
9
12
|
make generate-openapi-client
|
10
13
|
```
|
11
14
|
|
12
15
|
## Source
|
16
|
+
|
13
17
|
Generated from: `../langwatch-saas/langwatch/langwatch/src/app/api/openapiLangWatch.json`
|
14
18
|
|
15
19
|
---
|
@@ -17,12 +21,13 @@ Generated from: `../langwatch-saas/langwatch/langwatch/src/app/api/openapiLangWa
|
|
17
21
|
A client library for accessing LangWatch API
|
18
22
|
|
19
23
|
## Usage
|
24
|
+
|
20
25
|
First, create a client:
|
21
26
|
|
22
27
|
```python
|
23
28
|
from lang_watch_api_client import Client
|
24
29
|
|
25
|
-
client = Client(base_url="https://
|
30
|
+
client = Client(base_url="https://app.langwatch.ai")
|
26
31
|
```
|
27
32
|
|
28
33
|
If the endpoints you're going to hit require authentication, use `AuthenticatedClient` instead:
|
@@ -30,7 +35,7 @@ If the endpoints you're going to hit require authentication, use `AuthenticatedC
|
|
30
35
|
```python
|
31
36
|
from lang_watch_api_client import AuthenticatedClient
|
32
37
|
|
33
|
-
client = AuthenticatedClient(base_url="https://
|
38
|
+
client = AuthenticatedClient(base_url="https://app.langwatch.ai", token="SuperSecretToken")
|
34
39
|
```
|
35
40
|
|
36
41
|
Now call your endpoint and use your models:
|
@@ -62,7 +67,7 @@ By default, when you're calling an HTTPS API it will attempt to verify that SSL
|
|
62
67
|
|
63
68
|
```python
|
64
69
|
client = AuthenticatedClient(
|
65
|
-
base_url="https://
|
70
|
+
base_url="https://app.langwatch.ai",
|
66
71
|
token="SuperSecretToken",
|
67
72
|
verify_ssl="/path/to/certificate_bundle.pem",
|
68
73
|
)
|
@@ -72,18 +77,20 @@ You can also disable certificate validation altogether, but beware that **this i
|
|
72
77
|
|
73
78
|
```python
|
74
79
|
client = AuthenticatedClient(
|
75
|
-
base_url="https://
|
76
|
-
token="SuperSecretToken",
|
80
|
+
base_url="https://app.langwatch.ai",
|
81
|
+
token="SuperSecretToken",
|
77
82
|
verify_ssl=False
|
78
83
|
)
|
79
84
|
```
|
80
85
|
|
81
86
|
Things to know:
|
87
|
+
|
82
88
|
1. Every path/method combo becomes a Python module with four functions:
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
89
|
+
|
90
|
+
1. `sync`: Blocking request that returns parsed data (if successful) or `None`
|
91
|
+
1. `sync_detailed`: Blocking request that always returns a `Request`, optionally with `parsed` set if the request was successful.
|
92
|
+
1. `asyncio`: Like `sync` but async instead of blocking
|
93
|
+
1. `asyncio_detailed`: Like `sync_detailed` but async instead of blocking
|
87
94
|
|
88
95
|
1. All path/query params, and bodies become method arguments.
|
89
96
|
1. If your endpoint had any tags on it, the first tag will be used as a module name for the function (my_tag above)
|
@@ -104,7 +111,7 @@ def log_response(response):
|
|
104
111
|
print(f"Response event hook: {request.method} {request.url} - Status {response.status_code}")
|
105
112
|
|
106
113
|
client = Client(
|
107
|
-
base_url="https://
|
114
|
+
base_url="https://app.langwatch.ai",
|
108
115
|
httpx_args={"event_hooks": {"request": [log_request], "response": [log_response]}},
|
109
116
|
)
|
110
117
|
|
@@ -118,22 +125,25 @@ import httpx
|
|
118
125
|
from lang_watch_api_client import Client
|
119
126
|
|
120
127
|
client = Client(
|
121
|
-
base_url="https://
|
128
|
+
base_url="https://app.langwatch.ai",
|
122
129
|
)
|
123
130
|
# Note that base_url needs to be re-set, as would any shared cookies, headers, etc.
|
124
|
-
client.set_httpx_client(httpx.Client(base_url="https://
|
131
|
+
client.set_httpx_client(httpx.Client(base_url="https://app.langwatch.ai", proxies="http://localhost:8030"))
|
125
132
|
```
|
126
133
|
|
127
134
|
## Building / publishing this package
|
128
|
-
|
135
|
+
|
136
|
+
This project uses [Poetry](https://python-poetry.org/) to manage dependencies and packaging. Here are the basics:
|
137
|
+
|
129
138
|
1. Update the metadata in pyproject.toml (e.g. authors, version)
|
130
139
|
1. If you're using a private repository, configure it with Poetry
|
131
|
-
|
132
|
-
|
140
|
+
1. `poetry config repositories.<your-repository-name> <url-to-your-repository>`
|
141
|
+
1. `poetry config http-basic.<your-repository-name> <username> <password>`
|
133
142
|
1. Publish the client with `poetry publish --build -r <your-repository-name>` or, if for public PyPI, just `poetry publish --build`
|
134
143
|
|
135
144
|
If you want to install this client into another project without publishing it (e.g. for development) then:
|
145
|
+
|
136
146
|
1. If that project **is using Poetry**, you can simply do `poetry add <path-to-this-client>` from that project
|
137
147
|
1. If that project is not using Poetry:
|
138
|
-
|
139
|
-
|
148
|
+
1. Build a wheel with `poetry build -f wheel`
|
149
|
+
1. Install that wheel from the other project `pip install <path-to-wheel>`
|
scenario/_utils/__init__.py
CHANGED
@@ -7,7 +7,15 @@ for better user experience during scenario execution.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from .message_conversion import convert_agent_return_types_to_openai_messages
|
10
|
-
from .ids import
|
10
|
+
from .ids import (
|
11
|
+
get_batch_run_id,
|
12
|
+
get_or_create_batch_run_id, # Backward compatibility
|
13
|
+
generate_scenario_run_id,
|
14
|
+
generate_scenario_id,
|
15
|
+
generate_thread_id,
|
16
|
+
generate_message_id,
|
17
|
+
safe_parse_uuid,
|
18
|
+
)
|
11
19
|
from .utils import (
|
12
20
|
SerializableAndPydanticEncoder,
|
13
21
|
SerializableWithStringFallback,
|
@@ -20,8 +28,13 @@ from .utils import (
|
|
20
28
|
|
21
29
|
__all__ = [
|
22
30
|
"convert_agent_return_types_to_openai_messages",
|
23
|
-
"
|
31
|
+
"get_batch_run_id",
|
32
|
+
"get_or_create_batch_run_id", # Backward compatibility
|
24
33
|
"generate_scenario_run_id",
|
34
|
+
"generate_scenario_id",
|
35
|
+
"generate_thread_id",
|
36
|
+
"generate_message_id",
|
37
|
+
"safe_parse_uuid",
|
25
38
|
"SerializableAndPydanticEncoder",
|
26
39
|
"SerializableWithStringFallback",
|
27
40
|
"print_openai_messages",
|
@@ -29,4 +42,4 @@ __all__ = [
|
|
29
42
|
"check_valid_return_type",
|
30
43
|
"reverse_roles",
|
31
44
|
"await_if_awaitable",
|
32
|
-
]
|
45
|
+
]
|
scenario/_utils/ids.py
CHANGED
@@ -10,49 +10,87 @@ import os
|
|
10
10
|
import uuid
|
11
11
|
|
12
12
|
|
13
|
-
def
|
13
|
+
def generate_thread_id() -> str:
|
14
|
+
"""
|
15
|
+
Generates a new thread ID.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
str: A new thread ID.
|
19
|
+
"""
|
20
|
+
return f"thread_{uuid.uuid4()}"
|
21
|
+
|
22
|
+
|
23
|
+
def generate_scenario_run_id() -> str:
|
24
|
+
"""
|
25
|
+
Generates a new scenario run ID.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
str: A new scenario run ID.
|
29
|
+
"""
|
30
|
+
return f"scenariorun_{uuid.uuid4()}"
|
31
|
+
|
32
|
+
|
33
|
+
def generate_scenario_id() -> str:
|
34
|
+
"""
|
35
|
+
Generates a new scenario ID.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
str: A new scenario ID.
|
39
|
+
"""
|
40
|
+
return f"scenario_{uuid.uuid4()}"
|
41
|
+
|
42
|
+
|
43
|
+
def get_batch_run_id() -> str:
|
14
44
|
"""
|
15
|
-
Gets
|
16
|
-
|
17
|
-
|
18
|
-
execution, allowing grouping of related scenario runs. This is useful
|
19
|
-
for tracking and reporting on batches of scenarios run together.
|
20
|
-
|
45
|
+
Gets the batch run ID. If it's not set, it will be generated.
|
46
|
+
It can be set via the SCENARIO_BATCH_RUN_ID environment variable.
|
47
|
+
|
21
48
|
Returns:
|
22
|
-
str:
|
23
|
-
|
24
|
-
Example:
|
25
|
-
```python
|
26
|
-
# All scenarios in same process will share this ID
|
27
|
-
batch_id = get_or_create_batch_run_id()
|
28
|
-
print(f"Running scenario in batch: {batch_id}")
|
29
|
-
```
|
30
|
-
"""
|
31
|
-
|
49
|
+
str: The batch run ID.
|
50
|
+
"""
|
32
51
|
# Check if batch ID already exists in environment
|
33
|
-
|
52
|
+
batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
|
53
|
+
if not batch_run_id:
|
34
54
|
# Generate new batch ID if not set
|
35
|
-
|
36
|
-
|
37
|
-
return os.environ["SCENARIO_BATCH_ID"]
|
55
|
+
batch_run_id = f"scenariobatchrun_{uuid.uuid4()}"
|
56
|
+
os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
|
38
57
|
|
58
|
+
return batch_run_id
|
39
59
|
|
40
|
-
|
60
|
+
|
61
|
+
def generate_message_id() -> str:
|
62
|
+
"""
|
63
|
+
Generates a new message ID.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
str: A new message ID.
|
67
|
+
"""
|
68
|
+
return f"scenariomsg_{uuid.uuid4()}"
|
69
|
+
|
70
|
+
|
71
|
+
def safe_parse_uuid(id_str: str) -> bool:
|
41
72
|
"""
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
73
|
+
Safely parses a UUID string.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
id_str: The UUID string to parse.
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
bool: True if the UUID string is valid, false otherwise.
|
80
|
+
"""
|
81
|
+
try:
|
82
|
+
uuid.UUID(id_str)
|
83
|
+
return True
|
84
|
+
except (ValueError, TypeError):
|
85
|
+
return False
|
86
|
+
|
87
|
+
|
88
|
+
# Backward compatibility aliases
|
89
|
+
def get_or_create_batch_run_id() -> str:
|
90
|
+
"""
|
91
|
+
Backward compatibility alias for get_batch_run_id().
|
92
|
+
|
48
93
|
Returns:
|
49
|
-
str:
|
50
|
-
|
51
|
-
|
52
|
-
```python
|
53
|
-
# Each scenario gets its own unique ID
|
54
|
-
scenario_id = generate_scenario_run_id()
|
55
|
-
print(f"Running scenario with ID: {scenario_id}")
|
56
|
-
```
|
57
|
-
"""
|
58
|
-
return f"scenario-run-{uuid.uuid4()}"
|
94
|
+
str: The batch run ID.
|
95
|
+
"""
|
96
|
+
return get_batch_run_id()
|
@@ -0,0 +1,43 @@
|
|
1
|
+
"""
|
2
|
+
Configuration module for Scenario.
|
3
|
+
|
4
|
+
This module provides all configuration classes for customizing the behavior
|
5
|
+
of the Scenario testing framework, including model settings, scenario execution
|
6
|
+
parameters, and LangWatch integration.
|
7
|
+
|
8
|
+
Classes:
|
9
|
+
ModelConfig: Configuration for LLM model settings
|
10
|
+
ScenarioConfig: Main configuration for scenario execution
|
11
|
+
LangWatchSettings: Configuration for LangWatch API integration
|
12
|
+
|
13
|
+
Example:
|
14
|
+
```
|
15
|
+
from scenario.config import ModelConfig, ScenarioConfig, LangWatchSettings
|
16
|
+
|
17
|
+
# Configure LLM model
|
18
|
+
model_config = ModelConfig(
|
19
|
+
model="openai/gpt-4.1-mini",
|
20
|
+
temperature=0.1
|
21
|
+
)
|
22
|
+
|
23
|
+
# Configure scenario execution
|
24
|
+
scenario_config = ScenarioConfig(
|
25
|
+
default_model=model_config,
|
26
|
+
max_turns=15,
|
27
|
+
verbose=True
|
28
|
+
)
|
29
|
+
|
30
|
+
# Configure LangWatch integration
|
31
|
+
langwatch_settings = LangWatchSettings() # Reads from environment
|
32
|
+
```
|
33
|
+
"""
|
34
|
+
|
35
|
+
from .model import ModelConfig
|
36
|
+
from .scenario import ScenarioConfig
|
37
|
+
from .langwatch import LangWatchSettings
|
38
|
+
|
39
|
+
__all__ = [
|
40
|
+
"ModelConfig",
|
41
|
+
"ScenarioConfig",
|
42
|
+
"LangWatchSettings",
|
43
|
+
]
|
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
LangWatch configuration for Scenario.
|
3
|
+
|
4
|
+
This module provides configuration for LangWatch API integration,
|
5
|
+
including endpoint URLs and authentication credentials.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from pydantic import Field, HttpUrl
|
9
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
10
|
+
|
11
|
+
|
12
|
+
class LangWatchSettings(BaseSettings):
|
13
|
+
"""
|
14
|
+
Configuration for LangWatch API integration.
|
15
|
+
|
16
|
+
This class handles configuration for connecting to LangWatch services,
|
17
|
+
automatically reading from environment variables with the LANGWATCH_ prefix.
|
18
|
+
|
19
|
+
Attributes:
|
20
|
+
endpoint: LangWatch API endpoint URL
|
21
|
+
api_key: API key for LangWatch authentication
|
22
|
+
|
23
|
+
Environment Variables:
|
24
|
+
LANGWATCH_ENDPOINT: LangWatch API endpoint (defaults to https://app.langwatch.ai)
|
25
|
+
LANGWATCH_API_KEY: API key for authentication (defaults to empty string)
|
26
|
+
|
27
|
+
Example:
|
28
|
+
```
|
29
|
+
# Using environment variables
|
30
|
+
# export LANGWATCH_ENDPOINT="https://app.langwatch.ai"
|
31
|
+
# export LANGWATCH_API_KEY="your-api-key"
|
32
|
+
|
33
|
+
settings = LangWatchSettings()
|
34
|
+
print(settings.endpoint) # https://app.langwatch.ai
|
35
|
+
print(settings.api_key) # your-api-key
|
36
|
+
|
37
|
+
# Or override programmatically
|
38
|
+
settings = LangWatchSettings(
|
39
|
+
endpoint="https://custom.langwatch.ai",
|
40
|
+
api_key="your-api-key"
|
41
|
+
)
|
42
|
+
```
|
43
|
+
"""
|
44
|
+
|
45
|
+
model_config = SettingsConfigDict(env_prefix="LANGWATCH_", case_sensitive=False)
|
46
|
+
|
47
|
+
endpoint: HttpUrl = Field(
|
48
|
+
default=HttpUrl("https://app.langwatch.ai"),
|
49
|
+
description="LangWatch API endpoint URL",
|
50
|
+
)
|
51
|
+
api_key: str = Field(default="", description="API key for LangWatch authentication")
|
scenario/config/model.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
"""
|
2
|
+
Model configuration for Scenario.
|
3
|
+
|
4
|
+
This module provides configuration classes for LLM model settings used by
|
5
|
+
user simulator and judge agents in the Scenario framework.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Optional
|
9
|
+
from pydantic import BaseModel
|
10
|
+
|
11
|
+
|
12
|
+
class ModelConfig(BaseModel):
|
13
|
+
"""
|
14
|
+
Configuration for LLM model settings.
|
15
|
+
|
16
|
+
This class encapsulates all the parameters needed to configure an LLM model
|
17
|
+
for use with user simulator and judge agents in the Scenario framework.
|
18
|
+
|
19
|
+
Attributes:
|
20
|
+
model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
|
21
|
+
api_key: Optional API key for the model provider
|
22
|
+
temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
|
23
|
+
max_tokens: Maximum number of tokens to generate in responses
|
24
|
+
|
25
|
+
Example:
|
26
|
+
```
|
27
|
+
model_config = ModelConfig(
|
28
|
+
model="openai/gpt-4.1",
|
29
|
+
api_key="your-api-key",
|
30
|
+
temperature=0.1,
|
31
|
+
max_tokens=1000
|
32
|
+
)
|
33
|
+
```
|
34
|
+
"""
|
35
|
+
|
36
|
+
model: str
|
37
|
+
api_key: Optional[str] = None
|
38
|
+
temperature: float = 0.0
|
39
|
+
max_tokens: Optional[int] = None
|
@@ -1,43 +1,14 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
Scenario configuration for Scenario.
|
3
3
|
|
4
|
-
This module provides configuration
|
5
|
-
Scenario testing framework, including
|
6
|
-
and debugging options.
|
4
|
+
This module provides the main configuration class for customizing the behavior
|
5
|
+
of the Scenario testing framework, including execution parameters and debugging options.
|
7
6
|
"""
|
8
7
|
|
9
8
|
from typing import Optional, Union, ClassVar
|
10
9
|
from pydantic import BaseModel
|
11
10
|
|
12
|
-
|
13
|
-
class ModelConfig(BaseModel):
|
14
|
-
"""
|
15
|
-
Configuration for LLM model settings.
|
16
|
-
|
17
|
-
This class encapsulates all the parameters needed to configure an LLM model
|
18
|
-
for use with user simulator and judge agents in the Scenario framework.
|
19
|
-
|
20
|
-
Attributes:
|
21
|
-
model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
|
22
|
-
api_key: Optional API key for the model provider
|
23
|
-
temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
|
24
|
-
max_tokens: Maximum number of tokens to generate in responses
|
25
|
-
|
26
|
-
Example:
|
27
|
-
```
|
28
|
-
model_config = ModelConfig(
|
29
|
-
model="openai/gpt-4.1-mini",
|
30
|
-
api_key="your-api-key",
|
31
|
-
temperature=0.1,
|
32
|
-
max_tokens=1000
|
33
|
-
)
|
34
|
-
```
|
35
|
-
"""
|
36
|
-
|
37
|
-
model: str
|
38
|
-
api_key: Optional[str] = None
|
39
|
-
temperature: float = 0.0
|
40
|
-
max_tokens: Optional[int] = None
|
11
|
+
from .model import ModelConfig
|
41
12
|
|
42
13
|
|
43
14
|
class ScenarioConfig(BaseModel):
|
@@ -69,7 +40,7 @@ class ScenarioConfig(BaseModel):
|
|
69
40
|
# Or create a specific config instance
|
70
41
|
config = ScenarioConfig(
|
71
42
|
default_model=ModelConfig(
|
72
|
-
model="openai/gpt-4.1
|
43
|
+
model="openai/gpt-4.1",
|
73
44
|
temperature=0.2
|
74
45
|
),
|
75
46
|
max_turns=20
|
scenario/judge_agent.py
CHANGED
@@ -62,7 +62,7 @@ class JudgeAgent(AgentAdapter):
|
|
62
62
|
|
63
63
|
# Customized judge with specific model and behavior
|
64
64
|
strict_judge = scenario.JudgeAgent(
|
65
|
-
model="openai/gpt-4.1
|
65
|
+
model="openai/gpt-4.1",
|
66
66
|
criteria=[
|
67
67
|
"Code examples are syntactically correct",
|
68
68
|
"Explanations are technically accurate",
|
@@ -120,7 +120,7 @@ class JudgeAgent(AgentAdapter):
|
|
120
120
|
criteria: List of success criteria to evaluate the conversation against.
|
121
121
|
Can include both positive requirements ("Agent provides helpful responses")
|
122
122
|
and negative constraints ("Agent should not provide personal information").
|
123
|
-
model: LLM model identifier (e.g., "openai/gpt-4.1
|
123
|
+
model: LLM model identifier (e.g., "openai/gpt-4.1").
|
124
124
|
If not provided, uses the default model from global configuration.
|
125
125
|
api_key: API key for the model provider. If not provided,
|
126
126
|
uses the key from global configuration or environment.
|