langwatch-scenario 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.6.0.dist-info}/METADATA +93 -71
- langwatch_scenario-0.6.0.dist-info/RECORD +27 -0
- scenario/__init__.py +11 -114
- scenario/_utils/__init__.py +32 -0
- scenario/_utils/ids.py +58 -0
- scenario/_utils/message_conversion.py +103 -0
- scenario/{utils.py → _utils/utils.py} +21 -110
- scenario/agent_adapter.py +8 -4
- scenario/cache.py +4 -3
- scenario/config.py +7 -5
- scenario/events/__init__.py +66 -0
- scenario/events/event_bus.py +175 -0
- scenario/events/event_reporter.py +83 -0
- scenario/events/events.py +169 -0
- scenario/events/messages.py +84 -0
- scenario/events/utils.py +86 -0
- scenario/judge_agent.py +7 -28
- scenario/pytest_plugin.py +2 -47
- scenario/scenario_executor.py +268 -84
- scenario/scenario_state.py +6 -6
- scenario/script.py +9 -9
- scenario/types.py +10 -6
- scenario/user_simulator_agent.py +4 -11
- langwatch_scenario-0.4.0.dist-info/RECORD +0 -18
- {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.6.0.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.6.0.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.6.0.dist-info}/top_level.txt +0 -0
- /scenario/{error_messages.py → _error_messages.py} +0 -0
@@ -0,0 +1,175 @@
|
|
1
|
+
from rx.subject.subject import Subject
|
2
|
+
from rx import operators as ops
|
3
|
+
from typing import Optional
|
4
|
+
from datetime import datetime, UTC
|
5
|
+
from .events import ScenarioEvent, ScenarioRunFinishedEvent
|
6
|
+
from .event_reporter import EventReporter
|
7
|
+
from typing import Any
|
8
|
+
|
9
|
+
import asyncio
|
10
|
+
|
11
|
+
|
12
|
+
class ScenarioEventBus:
|
13
|
+
"""
|
14
|
+
Manages scenario event publishing, subscription, and processing pipeline using RxPY.
|
15
|
+
|
16
|
+
The EventBus provides a centralized event processing system that handles scenario
|
17
|
+
events asynchronously with retry logic and concurrent processing. It automatically
|
18
|
+
manages the event stream lifecycle and ensures all events are processed before
|
19
|
+
completion.
|
20
|
+
|
21
|
+
Events are processed concurrently to improve performance, and failed event
|
22
|
+
processing is automatically retried with exponential backoff.
|
23
|
+
|
24
|
+
Attributes:
|
25
|
+
_events: RxPY Subject for event stream management
|
26
|
+
_event_reporter: EventReporter instance for HTTP posting of events
|
27
|
+
_processing_complete: Async event to signal when all events are processed
|
28
|
+
_processing_task: Background task for event processing
|
29
|
+
_max_retries: Maximum number of retry attempts for failed event processing
|
30
|
+
|
31
|
+
Example:
|
32
|
+
```python
|
33
|
+
# Create event bus with custom reporter
|
34
|
+
reporter = EventReporter(endpoint="https://api.langwatch.ai")
|
35
|
+
event_bus = ScenarioEventBus(event_reporter=reporter, max_retries=5)
|
36
|
+
|
37
|
+
# Start listening for events
|
38
|
+
await event_bus.listen()
|
39
|
+
|
40
|
+
# Publish events
|
41
|
+
event_bus.publish(scenario_started_event)
|
42
|
+
event_bus.publish(message_snapshot_event)
|
43
|
+
event_bus.publish(scenario_finished_event) # This completes the stream
|
44
|
+
|
45
|
+
# Wait for all events to be processed
|
46
|
+
await event_bus.drain()
|
47
|
+
```
|
48
|
+
"""
|
49
|
+
|
50
|
+
def __init__(
|
51
|
+
self, event_reporter: Optional[EventReporter] = None, max_retries: int = 3
|
52
|
+
):
|
53
|
+
"""
|
54
|
+
Initialize the event bus with optional event reporter and retry configuration.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
event_reporter: Optional EventReporter for HTTP posting of events.
|
58
|
+
If not provided, a default EventReporter will be created.
|
59
|
+
max_retries: Maximum number of retry attempts for failed event processing.
|
60
|
+
Defaults to 3 attempts with exponential backoff.
|
61
|
+
"""
|
62
|
+
self._events = Subject()
|
63
|
+
# Use default EventReporter if none provided
|
64
|
+
self._event_reporter: EventReporter = event_reporter or EventReporter()
|
65
|
+
self._processing_complete = asyncio.Event()
|
66
|
+
self._processing_task: Optional[asyncio.Task[Any]] = None
|
67
|
+
self._max_retries = max_retries
|
68
|
+
|
69
|
+
def publish(self, event: ScenarioEvent) -> None:
|
70
|
+
"""
|
71
|
+
Publishes an event into the processing pipeline.
|
72
|
+
|
73
|
+
This method adds an event to the RxPY stream for processing. The event
|
74
|
+
timestamp is automatically set to the current time in milliseconds if
|
75
|
+
not already provided. Publishing a ScenarioRunFinishedEvent automatically
|
76
|
+
completes the event stream.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
event: The scenario event to publish. Must be a valid ScenarioEvent type.
|
80
|
+
|
81
|
+
Note:
|
82
|
+
Events are processed asynchronously in the background. Use `drain()`
|
83
|
+
to wait for all events to be processed after publishing.
|
84
|
+
"""
|
85
|
+
# Convert to Unix timestamp in milliseconds
|
86
|
+
event.timestamp = int(datetime.now(UTC).timestamp() * 1000)
|
87
|
+
self._events.on_next(event)
|
88
|
+
|
89
|
+
if isinstance(event, ScenarioRunFinishedEvent):
|
90
|
+
self._events.on_completed()
|
91
|
+
|
92
|
+
async def listen(self) -> None:
|
93
|
+
"""
|
94
|
+
Begins listening for and processing events.
|
95
|
+
|
96
|
+
This method sets up the RxPY event processing pipeline with concurrent
|
97
|
+
processing and automatic retry logic. It should be called before publishing
|
98
|
+
any events to ensure proper event handling.
|
99
|
+
|
100
|
+
The processing pipeline:
|
101
|
+
1. Receives events from the publish() method
|
102
|
+
2. Processes each event concurrently using asyncio tasks
|
103
|
+
3. Automatically retries failed events with exponential backoff
|
104
|
+
4. Completes when a ScenarioRunFinishedEvent is published
|
105
|
+
|
106
|
+
Note:
|
107
|
+
This method is idempotent - calling it multiple times has no effect
|
108
|
+
if the processing pipeline is already active.
|
109
|
+
"""
|
110
|
+
if self._processing_task is not None:
|
111
|
+
return
|
112
|
+
|
113
|
+
async def process_single_event(event: ScenarioEvent, attempt: int = 1) -> bool:
|
114
|
+
"""
|
115
|
+
Process a single event with retry logic.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
event: The event to process
|
119
|
+
attempt: Current attempt number (1-based)
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
True if processing succeeded, False if all retries failed
|
123
|
+
"""
|
124
|
+
try:
|
125
|
+
if self._event_reporter:
|
126
|
+
await self._event_reporter.post_event(event)
|
127
|
+
return True
|
128
|
+
except Exception as e:
|
129
|
+
if attempt >= self._max_retries:
|
130
|
+
print(f"Failed to process event after {attempt} attempts: {e}")
|
131
|
+
return False
|
132
|
+
print(
|
133
|
+
f"Error processing event (attempt {attempt}/{self._max_retries}): {e}"
|
134
|
+
)
|
135
|
+
await asyncio.sleep(0.1 * (2 ** (attempt - 1)))
|
136
|
+
return await process_single_event(event, attempt + 1)
|
137
|
+
|
138
|
+
def process_event(event: ScenarioEvent) -> asyncio.Task[bool]:
|
139
|
+
"""Create an asyncio task to process an event concurrently."""
|
140
|
+
loop = asyncio.get_event_loop()
|
141
|
+
return loop.create_task(process_single_event(event))
|
142
|
+
|
143
|
+
# Set up the event processing pipeline with concurrent processing
|
144
|
+
self._events.pipe(ops.flat_map(lambda event: process_event(event))).subscribe(
|
145
|
+
on_next=lambda success: None,
|
146
|
+
on_completed=lambda: self._processing_complete.set(),
|
147
|
+
on_error=lambda e: print(f"Unexpected error in event stream: {e}"),
|
148
|
+
)
|
149
|
+
|
150
|
+
async def drain(self) -> None:
|
151
|
+
"""
|
152
|
+
Waits for all events to be processed after the stream is completed.
|
153
|
+
|
154
|
+
This method blocks until all events in the processing pipeline have been
|
155
|
+
handled. It should be called after publishing all events to ensure
|
156
|
+
proper cleanup and that no events are lost.
|
157
|
+
|
158
|
+
Note:
|
159
|
+
This method will wait indefinitely if the event stream has not been
|
160
|
+
completed (i.e., if no ScenarioRunFinishedEvent has been published).
|
161
|
+
"""
|
162
|
+
await self._processing_complete.wait()
|
163
|
+
|
164
|
+
def is_completed(self) -> bool:
|
165
|
+
"""
|
166
|
+
Returns whether the event bus has completed processing all events.
|
167
|
+
|
168
|
+
This method provides a non-blocking way to check if all events have
|
169
|
+
been processed. It's useful for monitoring the state of the event bus
|
170
|
+
without blocking execution.
|
171
|
+
|
172
|
+
Returns:
|
173
|
+
True if all events have been processed, False otherwise
|
174
|
+
"""
|
175
|
+
return self._processing_complete.is_set()
|
@@ -0,0 +1,83 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import httpx
|
4
|
+
from typing import Optional
|
5
|
+
from .events import ScenarioEvent
|
6
|
+
|
7
|
+
|
8
|
+
class EventReporter:
|
9
|
+
"""
|
10
|
+
Handles HTTP posting of scenario events to external endpoints.
|
11
|
+
|
12
|
+
Single responsibility: Send events via HTTP to configured endpoints
|
13
|
+
with proper authentication and error handling.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
endpoint (str, optional): The base URL to post events to. Defaults to LANGWATCH_ENDPOINT env var.
|
17
|
+
api_key (str, optional): The API key for authentication. Defaults to LANGWATCH_API_KEY env var.
|
18
|
+
|
19
|
+
Example:
|
20
|
+
event = {
|
21
|
+
"type": "SCENARIO_RUN_STARTED",
|
22
|
+
"batch_run_id": "batch-1",
|
23
|
+
"scenario_id": "scenario-1",
|
24
|
+
"scenario_run_id": "run-1",
|
25
|
+
"metadata": {
|
26
|
+
"name": "test",
|
27
|
+
"description": "test scenario"
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
reporter = EventReporter(endpoint="https://api.langwatch.ai", api_key="test-api-key")
|
32
|
+
await reporter.post_event(event)
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
|
36
|
+
self.endpoint = endpoint or os.getenv("LANGWATCH_ENDPOINT")
|
37
|
+
self.api_key = api_key or os.getenv("LANGWATCH_API_KEY", "")
|
38
|
+
self.logger = logging.getLogger("EventReporter")
|
39
|
+
|
40
|
+
async def post_event(self, event: ScenarioEvent):
|
41
|
+
"""
|
42
|
+
Posts an event to the configured endpoint.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
event: A dictionary containing the event data
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
None - logs success/failure internally
|
49
|
+
"""
|
50
|
+
event_type = event.type_
|
51
|
+
self.logger.info(f"[{event_type}] Publishing event ({event.scenario_run_id})")
|
52
|
+
|
53
|
+
if not self.endpoint:
|
54
|
+
self.logger.warning(
|
55
|
+
"No LANGWATCH_ENDPOINT configured, skipping event posting"
|
56
|
+
)
|
57
|
+
return
|
58
|
+
|
59
|
+
try:
|
60
|
+
async with httpx.AsyncClient() as client:
|
61
|
+
response = await client.post(
|
62
|
+
f"{self.endpoint}/api/scenario-events",
|
63
|
+
json=event.to_dict(),
|
64
|
+
headers={
|
65
|
+
"Content-Type": "application/json",
|
66
|
+
"X-Auth-Token": self.api_key,
|
67
|
+
},
|
68
|
+
)
|
69
|
+
self.logger.info(f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})")
|
70
|
+
|
71
|
+
if response.is_success:
|
72
|
+
data = response.json()
|
73
|
+
self.logger.info(f"[{event_type}] POST response: {data} ({event.scenario_run_id})")
|
74
|
+
else:
|
75
|
+
error_text = response.text
|
76
|
+
self.logger.error(
|
77
|
+
f"[{event_type}] Event POST failed: status={response.status_code}, "
|
78
|
+
f"reason={response.reason_phrase}, error={error_text}, "
|
79
|
+
f"event={event}"
|
80
|
+
)
|
81
|
+
except Exception as error:
|
82
|
+
self.logger.error(
|
83
|
+
f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}")
|
@@ -0,0 +1,169 @@
|
|
1
|
+
"""
|
2
|
+
Exports scenario event models from the generated LangWatch API client,
|
3
|
+
renaming the auto-generated types to clean, meaningful names.
|
4
|
+
|
5
|
+
This ensures all event types are always in sync with the OpenAPI spec and
|
6
|
+
the backend, and provides a single import location for event models.
|
7
|
+
|
8
|
+
If you need to add custom logic or helpers, you can extend or wrap these models here.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from typing import Union, Any, Optional
|
12
|
+
from scenario.generated.langwatch_api_client.lang_watch_api_client.models import (
|
13
|
+
PostApiScenarioEventsBodyType0,
|
14
|
+
PostApiScenarioEventsBodyType0Metadata as ScenarioRunStartedEventMetadata,
|
15
|
+
PostApiScenarioEventsBodyType1,
|
16
|
+
PostApiScenarioEventsBodyType1ResultsType0 as ScenarioRunFinishedEventResults,
|
17
|
+
PostApiScenarioEventsBodyType1ResultsType0Verdict as ScenarioRunFinishedEventVerdict,
|
18
|
+
PostApiScenarioEventsBodyType1Status as ScenarioRunFinishedEventStatus,
|
19
|
+
PostApiScenarioEventsBodyType2,
|
20
|
+
# Message types for the snapshot event
|
21
|
+
PostApiScenarioEventsBodyType2MessagesItemType0,
|
22
|
+
PostApiScenarioEventsBodyType2MessagesItemType1,
|
23
|
+
PostApiScenarioEventsBodyType2MessagesItemType2,
|
24
|
+
PostApiScenarioEventsBodyType2MessagesItemType3,
|
25
|
+
PostApiScenarioEventsBodyType2MessagesItemType4,
|
26
|
+
)
|
27
|
+
|
28
|
+
# Type alias for message types
|
29
|
+
MessageType = Union[
|
30
|
+
PostApiScenarioEventsBodyType2MessagesItemType0,
|
31
|
+
PostApiScenarioEventsBodyType2MessagesItemType1,
|
32
|
+
PostApiScenarioEventsBodyType2MessagesItemType2,
|
33
|
+
PostApiScenarioEventsBodyType2MessagesItemType3,
|
34
|
+
PostApiScenarioEventsBodyType2MessagesItemType4,
|
35
|
+
]
|
36
|
+
|
37
|
+
class ScenarioRunStartedEvent(PostApiScenarioEventsBodyType0):
|
38
|
+
"""
|
39
|
+
Event published when a scenario run begins execution.
|
40
|
+
|
41
|
+
Automatically sets type_ to "SCENARIO_RUN_STARTED" and includes metadata
|
42
|
+
about the scenario (name, description, etc.).
|
43
|
+
|
44
|
+
Args:
|
45
|
+
batch_run_id (str): Unique identifier for the batch of scenario runs
|
46
|
+
scenario_id (str): Unique identifier for the scenario definition
|
47
|
+
scenario_run_id (str): Unique identifier for this specific run
|
48
|
+
metadata (ScenarioRunStartedEventMetadata): Scenario details like name and description
|
49
|
+
timestamp (Optional[int], optional): Unix timestamp in milliseconds, auto-generated if not provided
|
50
|
+
raw_event (Optional[Any], optional): Raw event data
|
51
|
+
scenario_set_id (Optional[str], optional): Set identifier, defaults to "default"
|
52
|
+
"""
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
batch_run_id: str,
|
56
|
+
scenario_id: str,
|
57
|
+
scenario_run_id: str,
|
58
|
+
metadata: ScenarioRunStartedEventMetadata,
|
59
|
+
timestamp: int,
|
60
|
+
raw_event: Optional[Any] = None,
|
61
|
+
scenario_set_id: Optional[str] = "default"
|
62
|
+
):
|
63
|
+
super().__init__(
|
64
|
+
type_="SCENARIO_RUN_STARTED",
|
65
|
+
batch_run_id=batch_run_id,
|
66
|
+
scenario_id=scenario_id,
|
67
|
+
scenario_run_id=scenario_run_id,
|
68
|
+
metadata=metadata,
|
69
|
+
timestamp=timestamp,
|
70
|
+
raw_event=raw_event,
|
71
|
+
scenario_set_id=scenario_set_id or "default"
|
72
|
+
)
|
73
|
+
|
74
|
+
class ScenarioRunFinishedEvent(PostApiScenarioEventsBodyType1):
|
75
|
+
"""
|
76
|
+
Event published when a scenario run completes execution.
|
77
|
+
|
78
|
+
Automatically sets type_ to "SCENARIO_RUN_FINISHED" and includes results
|
79
|
+
with verdict (PASS/FAIL/SUCCESS) and reasoning.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
batch_run_id (str): Unique identifier for the batch of scenario runs
|
83
|
+
scenario_id (str): Unique identifier for the scenario definition
|
84
|
+
scenario_run_id (str): Unique identifier for this specific run
|
85
|
+
status (ScenarioRunFinishedEventStatus): Overall execution status
|
86
|
+
timestamp (Optional[int], optional): Unix timestamp in milliseconds, auto-generated if not provided
|
87
|
+
raw_event (Optional[Any], optional): Raw event data
|
88
|
+
scenario_set_id (Optional[str], optional): Set identifier, defaults to "default"
|
89
|
+
results (Optional[ScenarioRunFinishedEventResults], optional): Verdict and reasoning for the outcome
|
90
|
+
"""
|
91
|
+
def __init__(
|
92
|
+
self,
|
93
|
+
batch_run_id: str,
|
94
|
+
scenario_id: str,
|
95
|
+
scenario_run_id: str,
|
96
|
+
status: ScenarioRunFinishedEventStatus,
|
97
|
+
timestamp: int,
|
98
|
+
results: Optional[ScenarioRunFinishedEventResults] = None,
|
99
|
+
raw_event: Optional[Any] = None,
|
100
|
+
scenario_set_id: Optional[str] = "default",
|
101
|
+
):
|
102
|
+
super().__init__(
|
103
|
+
type_="SCENARIO_RUN_FINISHED",
|
104
|
+
batch_run_id=batch_run_id,
|
105
|
+
scenario_id=scenario_id,
|
106
|
+
scenario_run_id=scenario_run_id,
|
107
|
+
status=status,
|
108
|
+
timestamp=timestamp,
|
109
|
+
raw_event=raw_event,
|
110
|
+
scenario_set_id=scenario_set_id or "default",
|
111
|
+
results=results
|
112
|
+
)
|
113
|
+
|
114
|
+
class ScenarioMessageSnapshotEvent(PostApiScenarioEventsBodyType2):
|
115
|
+
"""
|
116
|
+
Event published to capture intermediate state during scenario execution.
|
117
|
+
|
118
|
+
Automatically sets type_ to "SCENARIO_MESSAGE_SNAPSHOT" and allows tracking
|
119
|
+
of messages, context, or other runtime data during scenario processing.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
batch_run_id (str): Unique identifier for the batch of scenario runs
|
123
|
+
scenario_id (str): Unique identifier for the scenario definition
|
124
|
+
scenario_run_id (str): Unique identifier for this specific run
|
125
|
+
messages (list[MessageType]): List of message objects in the conversation
|
126
|
+
timestamp (Optional[int], optional): Unix timestamp in milliseconds, auto-generated if not provided
|
127
|
+
raw_event (Optional[Any], optional): Raw event data
|
128
|
+
scenario_set_id (Optional[str], optional): Set identifier, defaults to "default"
|
129
|
+
"""
|
130
|
+
def __init__(
|
131
|
+
self,
|
132
|
+
batch_run_id: str,
|
133
|
+
scenario_id: str,
|
134
|
+
scenario_run_id: str,
|
135
|
+
messages: list[MessageType],
|
136
|
+
timestamp: int,
|
137
|
+
raw_event: Optional[Any] = None,
|
138
|
+
scenario_set_id: Optional[str] = "default"
|
139
|
+
):
|
140
|
+
super().__init__(
|
141
|
+
type_="SCENARIO_MESSAGE_SNAPSHOT",
|
142
|
+
batch_run_id=batch_run_id,
|
143
|
+
scenario_id=scenario_id,
|
144
|
+
scenario_run_id=scenario_run_id,
|
145
|
+
messages=messages,
|
146
|
+
timestamp=timestamp,
|
147
|
+
raw_event=raw_event,
|
148
|
+
scenario_set_id=scenario_set_id or "default"
|
149
|
+
)
|
150
|
+
|
151
|
+
# Union type for all supported event types
|
152
|
+
ScenarioEvent = Union[
|
153
|
+
ScenarioRunStartedEvent,
|
154
|
+
ScenarioRunFinishedEvent,
|
155
|
+
ScenarioMessageSnapshotEvent
|
156
|
+
]
|
157
|
+
|
158
|
+
|
159
|
+
__all__ = [
|
160
|
+
"ScenarioEvent",
|
161
|
+
"ScenarioRunStartedEvent",
|
162
|
+
"ScenarioRunStartedEventMetadata",
|
163
|
+
"ScenarioRunFinishedEvent",
|
164
|
+
"ScenarioRunFinishedEventResults",
|
165
|
+
"ScenarioRunFinishedEventVerdict",
|
166
|
+
"ScenarioRunFinishedEventStatus",
|
167
|
+
"ScenarioMessageSnapshotEvent",
|
168
|
+
"MessageType",
|
169
|
+
]
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from typing import Union, Optional, List
|
2
|
+
from ag_ui.core import (
|
3
|
+
UserMessage as AgUiUserMessage,
|
4
|
+
AssistantMessage as AgUiAssistantMessage,
|
5
|
+
SystemMessage as AgUiSystemMessage,
|
6
|
+
ToolMessage as AgUiToolMessage,
|
7
|
+
ToolCall as AgUiToolCall,
|
8
|
+
FunctionCall as AgUiFunctionCall,
|
9
|
+
)
|
10
|
+
|
11
|
+
class UserMessage(AgUiUserMessage):
|
12
|
+
"""
|
13
|
+
An AG-UI user message extended with the to_dict method.
|
14
|
+
Enforces role='user' and requires content.
|
15
|
+
"""
|
16
|
+
def __init__(self, id: str, content: str, name: Optional[str] = None):
|
17
|
+
super().__init__(id=id, role="user", content=content, name=name)
|
18
|
+
|
19
|
+
def to_dict(self):
|
20
|
+
"""Convert the UserMessage to a dictionary representation."""
|
21
|
+
return self.model_dump(exclude_none=True)
|
22
|
+
|
23
|
+
class AssistantMessage(AgUiAssistantMessage):
|
24
|
+
"""
|
25
|
+
An AG-UI assistant message extended with the to_dict method.
|
26
|
+
Enforces role='assistant' and allows optional content and tool_calls.
|
27
|
+
"""
|
28
|
+
def __init__(self, id: str, content: Optional[str] = None, tool_calls: Optional[List['ToolCall']] = None, name: Optional[str] = None):
|
29
|
+
super().__init__(id=id, role="assistant", content=content, tool_calls=tool_calls, name=name)
|
30
|
+
|
31
|
+
def to_dict(self):
|
32
|
+
"""Convert the AssistantMessage to a dictionary representation."""
|
33
|
+
return self.model_dump(exclude_none=True)
|
34
|
+
|
35
|
+
class SystemMessage(AgUiSystemMessage):
|
36
|
+
"""
|
37
|
+
An AG-UI system message extended with the to_dict method.
|
38
|
+
Enforces role='system' and requires content.
|
39
|
+
"""
|
40
|
+
def __init__(self, id: str, content: str, name: Optional[str] = None):
|
41
|
+
super().__init__(id=id, role="system", content=content, name=name)
|
42
|
+
|
43
|
+
def to_dict(self):
|
44
|
+
"""Convert the SystemMessage to a dictionary representation."""
|
45
|
+
return self.model_dump(exclude_none=True)
|
46
|
+
|
47
|
+
class ToolMessage(AgUiToolMessage):
|
48
|
+
"""
|
49
|
+
An AG-UI tool message extended with the to_dict method.
|
50
|
+
Enforces role='tool' and requires content and tool_call_id.
|
51
|
+
"""
|
52
|
+
def __init__(self, id: str, content: str, tool_call_id: str):
|
53
|
+
super().__init__(id=id, role="tool", content=content, tool_call_id=tool_call_id)
|
54
|
+
|
55
|
+
def to_dict(self):
|
56
|
+
"""Convert the ToolMessage to a dictionary representation."""
|
57
|
+
return self.model_dump(exclude_none=True)
|
58
|
+
|
59
|
+
class ToolCall(AgUiToolCall):
|
60
|
+
"""
|
61
|
+
An AG-UI tool call extended with the to_dict method.
|
62
|
+
Enforces type='function' and requires id and function.
|
63
|
+
"""
|
64
|
+
def __init__(self, id: str, function: 'FunctionCall'):
|
65
|
+
super().__init__(id=id, type="function", function=function)
|
66
|
+
|
67
|
+
def to_dict(self):
|
68
|
+
"""Convert the ToolCall to a dictionary representation."""
|
69
|
+
return self.model_dump(exclude_none=True)
|
70
|
+
|
71
|
+
class FunctionCall(AgUiFunctionCall):
|
72
|
+
"""
|
73
|
+
An AG-UI function call extended with the to_dict method.
|
74
|
+
Requires name and arguments.
|
75
|
+
"""
|
76
|
+
def __init__(self, name: str, arguments: str):
|
77
|
+
super().__init__(name=name, arguments=arguments)
|
78
|
+
|
79
|
+
def to_dict(self):
|
80
|
+
"""Convert the FunctionCall to a dictionary representation."""
|
81
|
+
return self.model_dump(exclude_none=True)
|
82
|
+
|
83
|
+
# Union type alias for all message types
|
84
|
+
Message = Union[UserMessage, AssistantMessage, SystemMessage, ToolMessage, ToolCall, FunctionCall]
|
scenario/events/utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
|
2
|
+
from .messages import UserMessage, AssistantMessage, SystemMessage, ToolMessage, ToolCall, FunctionCall
|
3
|
+
from typing import List, Union
|
4
|
+
|
5
|
+
import uuid
|
6
|
+
|
7
|
+
# Define the correct Message type for the return value
|
8
|
+
Message = Union[UserMessage, AssistantMessage, SystemMessage, ToolMessage]
|
9
|
+
|
10
|
+
def convert_messages_to_ag_ui_messages(messages: list[ChatCompletionMessageParam]) -> list[Message]:
|
11
|
+
"""
|
12
|
+
Converts OpenAI ChatCompletionMessageParam messages to ag_ui Message format.
|
13
|
+
|
14
|
+
This function transforms messages from OpenAI's format to the ag_ui protocol
|
15
|
+
format for consistent message handling across the scenario framework.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
messages: List of OpenAI ChatCompletionMessageParam messages
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
List of ag_ui Message objects
|
22
|
+
|
23
|
+
Raises:
|
24
|
+
ValueError: If message role is not supported or message format is invalid
|
25
|
+
"""
|
26
|
+
|
27
|
+
converted_messages: list[Message] = []
|
28
|
+
|
29
|
+
for i, message in enumerate(messages):
|
30
|
+
# Generate unique ID for each message
|
31
|
+
message_id = message.get("id") or str(uuid.uuid4())
|
32
|
+
|
33
|
+
role = message.get("role")
|
34
|
+
content = message.get("content")
|
35
|
+
|
36
|
+
if role == "user":
|
37
|
+
if not content:
|
38
|
+
raise ValueError(f"User message at index {i} missing required content")
|
39
|
+
converted_messages.append(UserMessage(
|
40
|
+
id=message_id,
|
41
|
+
content=str(content)
|
42
|
+
))
|
43
|
+
elif role == "assistant":
|
44
|
+
# Handle tool calls if present
|
45
|
+
tool_calls = message.get("tool_calls")
|
46
|
+
ag_ui_tool_calls: List[ToolCall] | None = None
|
47
|
+
|
48
|
+
if tool_calls:
|
49
|
+
ag_ui_tool_calls = []
|
50
|
+
for tool_call in tool_calls:
|
51
|
+
ag_ui_tool_calls.append(ToolCall(
|
52
|
+
id=tool_call.get("id", str(uuid.uuid4())),
|
53
|
+
function=FunctionCall(
|
54
|
+
name=tool_call["function"]["name"],
|
55
|
+
arguments=tool_call["function"]["arguments"]
|
56
|
+
)
|
57
|
+
))
|
58
|
+
|
59
|
+
converted_messages.append(AssistantMessage(
|
60
|
+
id=message_id,
|
61
|
+
content=str(content) if content else None,
|
62
|
+
tool_calls=ag_ui_tool_calls
|
63
|
+
))
|
64
|
+
elif role == "system":
|
65
|
+
if not content:
|
66
|
+
raise ValueError(f"System message at index {i} missing required content")
|
67
|
+
converted_messages.append(SystemMessage(
|
68
|
+
id=message_id,
|
69
|
+
content=str(content)
|
70
|
+
))
|
71
|
+
elif role == "tool":
|
72
|
+
tool_call_id = message.get("tool_call_id")
|
73
|
+
if not tool_call_id:
|
74
|
+
raise ValueError(f"Tool message at index {i} missing required tool_call_id")
|
75
|
+
if not content:
|
76
|
+
raise ValueError(f"Tool message at index {i} missing required content")
|
77
|
+
|
78
|
+
converted_messages.append(ToolMessage(
|
79
|
+
id=message_id,
|
80
|
+
content=str(content),
|
81
|
+
tool_call_id=tool_call_id
|
82
|
+
))
|
83
|
+
else:
|
84
|
+
raise ValueError(f"Unsupported message role '{role}' at index {i}")
|
85
|
+
|
86
|
+
return converted_messages
|
scenario/judge_agent.py
CHANGED
@@ -19,7 +19,7 @@ from scenario.cache import scenario_cache
|
|
19
19
|
from scenario.agent_adapter import AgentAdapter
|
20
20
|
from scenario.config import ModelConfig, ScenarioConfig
|
21
21
|
|
22
|
-
from .
|
22
|
+
from ._error_messages import agent_not_configured_error_message
|
23
23
|
from .types import AgentInput, AgentReturnTypes, AgentRole, ScenarioResult
|
24
24
|
|
25
25
|
|
@@ -48,7 +48,7 @@ class JudgeAgent(AgentAdapter):
|
|
48
48
|
system_prompt: Custom system prompt to override default judge behavior
|
49
49
|
|
50
50
|
Example:
|
51
|
-
```
|
51
|
+
```
|
52
52
|
import scenario
|
53
53
|
|
54
54
|
# Basic judge agent with criteria
|
@@ -133,14 +133,12 @@ class JudgeAgent(AgentAdapter):
|
|
133
133
|
Exception: If no model is configured either in parameters or global config
|
134
134
|
|
135
135
|
Example:
|
136
|
-
```
|
136
|
+
```
|
137
137
|
# Customer service judge
|
138
138
|
cs_judge = JudgeAgent(
|
139
139
|
criteria=[
|
140
|
-
"Agent
|
141
|
-
"Agent
|
142
|
-
"Agent offers appropriate solutions or next steps",
|
143
|
-
"Agent does not make promises the company cannot keep"
|
140
|
+
"Agent replies with the refund policy",
|
141
|
+
"Agent offers next steps for the customer",
|
144
142
|
],
|
145
143
|
temperature=0.1
|
146
144
|
)
|
@@ -148,9 +146,8 @@ class JudgeAgent(AgentAdapter):
|
|
148
146
|
# Technical accuracy judge
|
149
147
|
tech_judge = JudgeAgent(
|
150
148
|
criteria=[
|
151
|
-
"
|
152
|
-
"
|
153
|
-
"Best practices are recommended"
|
149
|
+
"Agent adds a code review pointing out the code compilation errors",
|
150
|
+
"Agent adds a code review about the missing security headers"
|
154
151
|
],
|
155
152
|
system_prompt="You are a senior software engineer reviewing code for production use."
|
156
153
|
)
|
@@ -210,24 +207,6 @@ class JudgeAgent(AgentAdapter):
|
|
210
207
|
Exception: If the judge cannot make a valid decision or if there's an
|
211
208
|
error in the evaluation process
|
212
209
|
|
213
|
-
Example:
|
214
|
-
The judge evaluates conversations like this:
|
215
|
-
|
216
|
-
```
|
217
|
-
Conversation so far:
|
218
|
-
User: "I need help with authentication"
|
219
|
-
Agent: "I can help! What authentication method are you using?"
|
220
|
-
User: "JWT tokens"
|
221
|
-
Agent: "Here's how to implement JWT securely: [detailed code example]"
|
222
|
-
|
223
|
-
Judge evaluation:
|
224
|
-
- ✓ Agent provides helpful responses
|
225
|
-
- ✓ Agent asks relevant follow-up questions
|
226
|
-
- ✓ Security best practices are mentioned
|
227
|
-
|
228
|
-
Decision: CONTINUE (all criteria being met so far)
|
229
|
-
```
|
230
|
-
|
231
210
|
Note:
|
232
211
|
- Returns empty list [] to continue the scenario
|
233
212
|
- Returns ScenarioResult to end with success/failure
|