langwatch-scenario 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langwatch_scenario-0.6.0.dist-info/METADATA +385 -0
- langwatch_scenario-0.6.0.dist-info/RECORD +27 -0
- scenario/__init__.py +128 -17
- scenario/{error_messages.py → _error_messages.py} +8 -38
- scenario/_utils/__init__.py +32 -0
- scenario/_utils/ids.py +58 -0
- scenario/_utils/message_conversion.py +103 -0
- scenario/_utils/utils.py +425 -0
- scenario/agent_adapter.py +115 -0
- scenario/cache.py +134 -9
- scenario/config.py +156 -10
- scenario/events/__init__.py +66 -0
- scenario/events/event_bus.py +175 -0
- scenario/events/event_reporter.py +83 -0
- scenario/events/events.py +169 -0
- scenario/events/messages.py +84 -0
- scenario/events/utils.py +86 -0
- scenario/judge_agent.py +414 -0
- scenario/pytest_plugin.py +177 -14
- scenario/scenario_executor.py +630 -154
- scenario/scenario_state.py +205 -0
- scenario/script.py +361 -0
- scenario/types.py +197 -20
- scenario/user_simulator_agent.py +242 -0
- langwatch_scenario-0.3.0.dist-info/METADATA +0 -302
- langwatch_scenario-0.3.0.dist-info/RECORD +0 -16
- scenario/scenario.py +0 -238
- scenario/scenario_agent_adapter.py +0 -16
- scenario/testing_agent.py +0 -279
- scenario/utils.py +0 -264
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.6.0.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.6.0.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.3.0.dist-info → langwatch_scenario-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import httpx
|
4
|
+
from typing import Optional
|
5
|
+
from .events import ScenarioEvent
|
6
|
+
|
7
|
+
|
8
|
+
class EventReporter:
|
9
|
+
"""
|
10
|
+
Handles HTTP posting of scenario events to external endpoints.
|
11
|
+
|
12
|
+
Single responsibility: Send events via HTTP to configured endpoints
|
13
|
+
with proper authentication and error handling.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
endpoint (str, optional): The base URL to post events to. Defaults to LANGWATCH_ENDPOINT env var.
|
17
|
+
api_key (str, optional): The API key for authentication. Defaults to LANGWATCH_API_KEY env var.
|
18
|
+
|
19
|
+
Example:
|
20
|
+
event = {
|
21
|
+
"type": "SCENARIO_RUN_STARTED",
|
22
|
+
"batch_run_id": "batch-1",
|
23
|
+
"scenario_id": "scenario-1",
|
24
|
+
"scenario_run_id": "run-1",
|
25
|
+
"metadata": {
|
26
|
+
"name": "test",
|
27
|
+
"description": "test scenario"
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
reporter = EventReporter(endpoint="https://api.langwatch.ai", api_key="test-api-key")
|
32
|
+
await reporter.post_event(event)
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
|
36
|
+
self.endpoint = endpoint or os.getenv("LANGWATCH_ENDPOINT")
|
37
|
+
self.api_key = api_key or os.getenv("LANGWATCH_API_KEY", "")
|
38
|
+
self.logger = logging.getLogger("EventReporter")
|
39
|
+
|
40
|
+
async def post_event(self, event: ScenarioEvent):
|
41
|
+
"""
|
42
|
+
Posts an event to the configured endpoint.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
event: A dictionary containing the event data
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
None - logs success/failure internally
|
49
|
+
"""
|
50
|
+
event_type = event.type_
|
51
|
+
self.logger.info(f"[{event_type}] Publishing event ({event.scenario_run_id})")
|
52
|
+
|
53
|
+
if not self.endpoint:
|
54
|
+
self.logger.warning(
|
55
|
+
"No LANGWATCH_ENDPOINT configured, skipping event posting"
|
56
|
+
)
|
57
|
+
return
|
58
|
+
|
59
|
+
try:
|
60
|
+
async with httpx.AsyncClient() as client:
|
61
|
+
response = await client.post(
|
62
|
+
f"{self.endpoint}/api/scenario-events",
|
63
|
+
json=event.to_dict(),
|
64
|
+
headers={
|
65
|
+
"Content-Type": "application/json",
|
66
|
+
"X-Auth-Token": self.api_key,
|
67
|
+
},
|
68
|
+
)
|
69
|
+
self.logger.info(f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})")
|
70
|
+
|
71
|
+
if response.is_success:
|
72
|
+
data = response.json()
|
73
|
+
self.logger.info(f"[{event_type}] POST response: {data} ({event.scenario_run_id})")
|
74
|
+
else:
|
75
|
+
error_text = response.text
|
76
|
+
self.logger.error(
|
77
|
+
f"[{event_type}] Event POST failed: status={response.status_code}, "
|
78
|
+
f"reason={response.reason_phrase}, error={error_text}, "
|
79
|
+
f"event={event}"
|
80
|
+
)
|
81
|
+
except Exception as error:
|
82
|
+
self.logger.error(
|
83
|
+
f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}")
|
@@ -0,0 +1,169 @@
|
|
1
|
+
"""
|
2
|
+
Exports scenario event models from the generated LangWatch API client,
|
3
|
+
renaming the auto-generated types to clean, meaningful names.
|
4
|
+
|
5
|
+
This ensures all event types are always in sync with the OpenAPI spec and
|
6
|
+
the backend, and provides a single import location for event models.
|
7
|
+
|
8
|
+
If you need to add custom logic or helpers, you can extend or wrap these models here.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from typing import Union, Any, Optional
|
12
|
+
from scenario.generated.langwatch_api_client.lang_watch_api_client.models import (
|
13
|
+
PostApiScenarioEventsBodyType0,
|
14
|
+
PostApiScenarioEventsBodyType0Metadata as ScenarioRunStartedEventMetadata,
|
15
|
+
PostApiScenarioEventsBodyType1,
|
16
|
+
PostApiScenarioEventsBodyType1ResultsType0 as ScenarioRunFinishedEventResults,
|
17
|
+
PostApiScenarioEventsBodyType1ResultsType0Verdict as ScenarioRunFinishedEventVerdict,
|
18
|
+
PostApiScenarioEventsBodyType1Status as ScenarioRunFinishedEventStatus,
|
19
|
+
PostApiScenarioEventsBodyType2,
|
20
|
+
# Message types for the snapshot event
|
21
|
+
PostApiScenarioEventsBodyType2MessagesItemType0,
|
22
|
+
PostApiScenarioEventsBodyType2MessagesItemType1,
|
23
|
+
PostApiScenarioEventsBodyType2MessagesItemType2,
|
24
|
+
PostApiScenarioEventsBodyType2MessagesItemType3,
|
25
|
+
PostApiScenarioEventsBodyType2MessagesItemType4,
|
26
|
+
)
|
27
|
+
|
28
|
+
# Type alias for message types
|
29
|
+
MessageType = Union[
|
30
|
+
PostApiScenarioEventsBodyType2MessagesItemType0,
|
31
|
+
PostApiScenarioEventsBodyType2MessagesItemType1,
|
32
|
+
PostApiScenarioEventsBodyType2MessagesItemType2,
|
33
|
+
PostApiScenarioEventsBodyType2MessagesItemType3,
|
34
|
+
PostApiScenarioEventsBodyType2MessagesItemType4,
|
35
|
+
]
|
36
|
+
|
37
|
+
class ScenarioRunStartedEvent(PostApiScenarioEventsBodyType0):
|
38
|
+
"""
|
39
|
+
Event published when a scenario run begins execution.
|
40
|
+
|
41
|
+
Automatically sets type_ to "SCENARIO_RUN_STARTED" and includes metadata
|
42
|
+
about the scenario (name, description, etc.).
|
43
|
+
|
44
|
+
Args:
|
45
|
+
batch_run_id (str): Unique identifier for the batch of scenario runs
|
46
|
+
scenario_id (str): Unique identifier for the scenario definition
|
47
|
+
scenario_run_id (str): Unique identifier for this specific run
|
48
|
+
metadata (ScenarioRunStartedEventMetadata): Scenario details like name and description
|
49
|
+
timestamp (Optional[int], optional): Unix timestamp in milliseconds, auto-generated if not provided
|
50
|
+
raw_event (Optional[Any], optional): Raw event data
|
51
|
+
scenario_set_id (Optional[str], optional): Set identifier, defaults to "default"
|
52
|
+
"""
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
batch_run_id: str,
|
56
|
+
scenario_id: str,
|
57
|
+
scenario_run_id: str,
|
58
|
+
metadata: ScenarioRunStartedEventMetadata,
|
59
|
+
timestamp: int,
|
60
|
+
raw_event: Optional[Any] = None,
|
61
|
+
scenario_set_id: Optional[str] = "default"
|
62
|
+
):
|
63
|
+
super().__init__(
|
64
|
+
type_="SCENARIO_RUN_STARTED",
|
65
|
+
batch_run_id=batch_run_id,
|
66
|
+
scenario_id=scenario_id,
|
67
|
+
scenario_run_id=scenario_run_id,
|
68
|
+
metadata=metadata,
|
69
|
+
timestamp=timestamp,
|
70
|
+
raw_event=raw_event,
|
71
|
+
scenario_set_id=scenario_set_id or "default"
|
72
|
+
)
|
73
|
+
|
74
|
+
class ScenarioRunFinishedEvent(PostApiScenarioEventsBodyType1):
|
75
|
+
"""
|
76
|
+
Event published when a scenario run completes execution.
|
77
|
+
|
78
|
+
Automatically sets type_ to "SCENARIO_RUN_FINISHED" and includes results
|
79
|
+
with verdict (PASS/FAIL/SUCCESS) and reasoning.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
batch_run_id (str): Unique identifier for the batch of scenario runs
|
83
|
+
scenario_id (str): Unique identifier for the scenario definition
|
84
|
+
scenario_run_id (str): Unique identifier for this specific run
|
85
|
+
status (ScenarioRunFinishedEventStatus): Overall execution status
|
86
|
+
timestamp (Optional[int], optional): Unix timestamp in milliseconds, auto-generated if not provided
|
87
|
+
raw_event (Optional[Any], optional): Raw event data
|
88
|
+
scenario_set_id (Optional[str], optional): Set identifier, defaults to "default"
|
89
|
+
results (Optional[ScenarioRunFinishedEventResults], optional): Verdict and reasoning for the outcome
|
90
|
+
"""
|
91
|
+
def __init__(
|
92
|
+
self,
|
93
|
+
batch_run_id: str,
|
94
|
+
scenario_id: str,
|
95
|
+
scenario_run_id: str,
|
96
|
+
status: ScenarioRunFinishedEventStatus,
|
97
|
+
timestamp: int,
|
98
|
+
results: Optional[ScenarioRunFinishedEventResults] = None,
|
99
|
+
raw_event: Optional[Any] = None,
|
100
|
+
scenario_set_id: Optional[str] = "default",
|
101
|
+
):
|
102
|
+
super().__init__(
|
103
|
+
type_="SCENARIO_RUN_FINISHED",
|
104
|
+
batch_run_id=batch_run_id,
|
105
|
+
scenario_id=scenario_id,
|
106
|
+
scenario_run_id=scenario_run_id,
|
107
|
+
status=status,
|
108
|
+
timestamp=timestamp,
|
109
|
+
raw_event=raw_event,
|
110
|
+
scenario_set_id=scenario_set_id or "default",
|
111
|
+
results=results
|
112
|
+
)
|
113
|
+
|
114
|
+
class ScenarioMessageSnapshotEvent(PostApiScenarioEventsBodyType2):
|
115
|
+
"""
|
116
|
+
Event published to capture intermediate state during scenario execution.
|
117
|
+
|
118
|
+
Automatically sets type_ to "SCENARIO_MESSAGE_SNAPSHOT" and allows tracking
|
119
|
+
of messages, context, or other runtime data during scenario processing.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
batch_run_id (str): Unique identifier for the batch of scenario runs
|
123
|
+
scenario_id (str): Unique identifier for the scenario definition
|
124
|
+
scenario_run_id (str): Unique identifier for this specific run
|
125
|
+
messages (list[MessageType]): List of message objects in the conversation
|
126
|
+
timestamp (Optional[int], optional): Unix timestamp in milliseconds, auto-generated if not provided
|
127
|
+
raw_event (Optional[Any], optional): Raw event data
|
128
|
+
scenario_set_id (Optional[str], optional): Set identifier, defaults to "default"
|
129
|
+
"""
|
130
|
+
def __init__(
|
131
|
+
self,
|
132
|
+
batch_run_id: str,
|
133
|
+
scenario_id: str,
|
134
|
+
scenario_run_id: str,
|
135
|
+
messages: list[MessageType],
|
136
|
+
timestamp: int,
|
137
|
+
raw_event: Optional[Any] = None,
|
138
|
+
scenario_set_id: Optional[str] = "default"
|
139
|
+
):
|
140
|
+
super().__init__(
|
141
|
+
type_="SCENARIO_MESSAGE_SNAPSHOT",
|
142
|
+
batch_run_id=batch_run_id,
|
143
|
+
scenario_id=scenario_id,
|
144
|
+
scenario_run_id=scenario_run_id,
|
145
|
+
messages=messages,
|
146
|
+
timestamp=timestamp,
|
147
|
+
raw_event=raw_event,
|
148
|
+
scenario_set_id=scenario_set_id or "default"
|
149
|
+
)
|
150
|
+
|
151
|
+
# Union type for all supported event types
|
152
|
+
ScenarioEvent = Union[
|
153
|
+
ScenarioRunStartedEvent,
|
154
|
+
ScenarioRunFinishedEvent,
|
155
|
+
ScenarioMessageSnapshotEvent
|
156
|
+
]
|
157
|
+
|
158
|
+
|
159
|
+
__all__ = [
|
160
|
+
"ScenarioEvent",
|
161
|
+
"ScenarioRunStartedEvent",
|
162
|
+
"ScenarioRunStartedEventMetadata",
|
163
|
+
"ScenarioRunFinishedEvent",
|
164
|
+
"ScenarioRunFinishedEventResults",
|
165
|
+
"ScenarioRunFinishedEventVerdict",
|
166
|
+
"ScenarioRunFinishedEventStatus",
|
167
|
+
"ScenarioMessageSnapshotEvent",
|
168
|
+
"MessageType",
|
169
|
+
]
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from typing import Union, Optional, List
|
2
|
+
from ag_ui.core import (
|
3
|
+
UserMessage as AgUiUserMessage,
|
4
|
+
AssistantMessage as AgUiAssistantMessage,
|
5
|
+
SystemMessage as AgUiSystemMessage,
|
6
|
+
ToolMessage as AgUiToolMessage,
|
7
|
+
ToolCall as AgUiToolCall,
|
8
|
+
FunctionCall as AgUiFunctionCall,
|
9
|
+
)
|
10
|
+
|
11
|
+
class UserMessage(AgUiUserMessage):
|
12
|
+
"""
|
13
|
+
An AG-UI user message extended with the to_dict method.
|
14
|
+
Enforces role='user' and requires content.
|
15
|
+
"""
|
16
|
+
def __init__(self, id: str, content: str, name: Optional[str] = None):
|
17
|
+
super().__init__(id=id, role="user", content=content, name=name)
|
18
|
+
|
19
|
+
def to_dict(self):
|
20
|
+
"""Convert the UserMessage to a dictionary representation."""
|
21
|
+
return self.model_dump(exclude_none=True)
|
22
|
+
|
23
|
+
class AssistantMessage(AgUiAssistantMessage):
|
24
|
+
"""
|
25
|
+
An AG-UI assistant message extended with the to_dict method.
|
26
|
+
Enforces role='assistant' and allows optional content and tool_calls.
|
27
|
+
"""
|
28
|
+
def __init__(self, id: str, content: Optional[str] = None, tool_calls: Optional[List['ToolCall']] = None, name: Optional[str] = None):
|
29
|
+
super().__init__(id=id, role="assistant", content=content, tool_calls=tool_calls, name=name)
|
30
|
+
|
31
|
+
def to_dict(self):
|
32
|
+
"""Convert the AssistantMessage to a dictionary representation."""
|
33
|
+
return self.model_dump(exclude_none=True)
|
34
|
+
|
35
|
+
class SystemMessage(AgUiSystemMessage):
|
36
|
+
"""
|
37
|
+
An AG-UI system message extended with the to_dict method.
|
38
|
+
Enforces role='system' and requires content.
|
39
|
+
"""
|
40
|
+
def __init__(self, id: str, content: str, name: Optional[str] = None):
|
41
|
+
super().__init__(id=id, role="system", content=content, name=name)
|
42
|
+
|
43
|
+
def to_dict(self):
|
44
|
+
"""Convert the SystemMessage to a dictionary representation."""
|
45
|
+
return self.model_dump(exclude_none=True)
|
46
|
+
|
47
|
+
class ToolMessage(AgUiToolMessage):
|
48
|
+
"""
|
49
|
+
An AG-UI tool message extended with the to_dict method.
|
50
|
+
Enforces role='tool' and requires content and tool_call_id.
|
51
|
+
"""
|
52
|
+
def __init__(self, id: str, content: str, tool_call_id: str):
|
53
|
+
super().__init__(id=id, role="tool", content=content, tool_call_id=tool_call_id)
|
54
|
+
|
55
|
+
def to_dict(self):
|
56
|
+
"""Convert the ToolMessage to a dictionary representation."""
|
57
|
+
return self.model_dump(exclude_none=True)
|
58
|
+
|
59
|
+
class ToolCall(AgUiToolCall):
|
60
|
+
"""
|
61
|
+
An AG-UI tool call extended with the to_dict method.
|
62
|
+
Enforces type='function' and requires id and function.
|
63
|
+
"""
|
64
|
+
def __init__(self, id: str, function: 'FunctionCall'):
|
65
|
+
super().__init__(id=id, type="function", function=function)
|
66
|
+
|
67
|
+
def to_dict(self):
|
68
|
+
"""Convert the ToolCall to a dictionary representation."""
|
69
|
+
return self.model_dump(exclude_none=True)
|
70
|
+
|
71
|
+
class FunctionCall(AgUiFunctionCall):
|
72
|
+
"""
|
73
|
+
An AG-UI function call extended with the to_dict method.
|
74
|
+
Requires name and arguments.
|
75
|
+
"""
|
76
|
+
def __init__(self, name: str, arguments: str):
|
77
|
+
super().__init__(name=name, arguments=arguments)
|
78
|
+
|
79
|
+
def to_dict(self):
|
80
|
+
"""Convert the FunctionCall to a dictionary representation."""
|
81
|
+
return self.model_dump(exclude_none=True)
|
82
|
+
|
83
|
+
# Union type alias for all message types
|
84
|
+
Message = Union[UserMessage, AssistantMessage, SystemMessage, ToolMessage, ToolCall, FunctionCall]
|
scenario/events/utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
|
2
|
+
from .messages import UserMessage, AssistantMessage, SystemMessage, ToolMessage, ToolCall, FunctionCall
|
3
|
+
from typing import List, Union
|
4
|
+
|
5
|
+
import uuid
|
6
|
+
|
7
|
+
# Define the correct Message type for the return value
|
8
|
+
Message = Union[UserMessage, AssistantMessage, SystemMessage, ToolMessage]
|
9
|
+
|
10
|
+
def convert_messages_to_ag_ui_messages(messages: list[ChatCompletionMessageParam]) -> list[Message]:
|
11
|
+
"""
|
12
|
+
Converts OpenAI ChatCompletionMessageParam messages to ag_ui Message format.
|
13
|
+
|
14
|
+
This function transforms messages from OpenAI's format to the ag_ui protocol
|
15
|
+
format for consistent message handling across the scenario framework.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
messages: List of OpenAI ChatCompletionMessageParam messages
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
List of ag_ui Message objects
|
22
|
+
|
23
|
+
Raises:
|
24
|
+
ValueError: If message role is not supported or message format is invalid
|
25
|
+
"""
|
26
|
+
|
27
|
+
converted_messages: list[Message] = []
|
28
|
+
|
29
|
+
for i, message in enumerate(messages):
|
30
|
+
# Generate unique ID for each message
|
31
|
+
message_id = message.get("id") or str(uuid.uuid4())
|
32
|
+
|
33
|
+
role = message.get("role")
|
34
|
+
content = message.get("content")
|
35
|
+
|
36
|
+
if role == "user":
|
37
|
+
if not content:
|
38
|
+
raise ValueError(f"User message at index {i} missing required content")
|
39
|
+
converted_messages.append(UserMessage(
|
40
|
+
id=message_id,
|
41
|
+
content=str(content)
|
42
|
+
))
|
43
|
+
elif role == "assistant":
|
44
|
+
# Handle tool calls if present
|
45
|
+
tool_calls = message.get("tool_calls")
|
46
|
+
ag_ui_tool_calls: List[ToolCall] | None = None
|
47
|
+
|
48
|
+
if tool_calls:
|
49
|
+
ag_ui_tool_calls = []
|
50
|
+
for tool_call in tool_calls:
|
51
|
+
ag_ui_tool_calls.append(ToolCall(
|
52
|
+
id=tool_call.get("id", str(uuid.uuid4())),
|
53
|
+
function=FunctionCall(
|
54
|
+
name=tool_call["function"]["name"],
|
55
|
+
arguments=tool_call["function"]["arguments"]
|
56
|
+
)
|
57
|
+
))
|
58
|
+
|
59
|
+
converted_messages.append(AssistantMessage(
|
60
|
+
id=message_id,
|
61
|
+
content=str(content) if content else None,
|
62
|
+
tool_calls=ag_ui_tool_calls
|
63
|
+
))
|
64
|
+
elif role == "system":
|
65
|
+
if not content:
|
66
|
+
raise ValueError(f"System message at index {i} missing required content")
|
67
|
+
converted_messages.append(SystemMessage(
|
68
|
+
id=message_id,
|
69
|
+
content=str(content)
|
70
|
+
))
|
71
|
+
elif role == "tool":
|
72
|
+
tool_call_id = message.get("tool_call_id")
|
73
|
+
if not tool_call_id:
|
74
|
+
raise ValueError(f"Tool message at index {i} missing required tool_call_id")
|
75
|
+
if not content:
|
76
|
+
raise ValueError(f"Tool message at index {i} missing required content")
|
77
|
+
|
78
|
+
converted_messages.append(ToolMessage(
|
79
|
+
id=message_id,
|
80
|
+
content=str(content),
|
81
|
+
tool_call_id=tool_call_id
|
82
|
+
))
|
83
|
+
else:
|
84
|
+
raise ValueError(f"Unsupported message role '{role}' at index {i}")
|
85
|
+
|
86
|
+
return converted_messages
|