PyPI - langwatch-scenario - Versions diffs - 0.7.9__py3-none-any.whl → 0.7.10__py3-none-any.whl - Mend

langwatch-scenario 0.7.9py3-none-any.whl → 0.7.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{langwatch_scenario-0.7.9.dist-info → langwatch_scenario-0.7.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langwatch-scenario
-Version: 0.7.9
+Version: 0.7.10
 Summary: The end-to-end agent testing library
 Author-email: LangWatch Team <support@langwatch.ai>
 License: MIT
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: pytest>=8.1.1
 Requires-Dist: litellm>=1.49.0
@@ -31,6 +31,7 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: rx>=3.2.0
 Requires-Dist: python-dateutil>=2.9.0.post0
 Requires-Dist: pydantic-settings>=2.9.1
+Requires-Dist: langwatch>=0.2.19
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: isort; extra == "dev"

{langwatch_scenario-0.7.9.dist-info → langwatch_scenario-0.7.10.dist-info}/RECORD RENAMED Viewed

@@ -2,21 +2,21 @@ scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
 scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
 scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
 scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
-scenario/judge_agent.py,sha256=TSwykEWhoBA9F__sUsSuUMpu7pOkT1lIJo8YlEj2eiA,16759
+scenario/judge_agent.py,sha256=hHQ2nKsOgSyTtN0LdE6xIF0wZnnlYLN6RcxTPecFHDU,16770
 scenario/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 scenario/pytest_plugin.py,sha256=wRCuGD9uwrrLt2fY15zK6mnmY9W_dO_m0WalPJYE5II,11491
-scenario/scenario_executor.py,sha256=_GRpFpw_WtgtaGpxWh0A0HNNf-aU78PdIiVdgEFm9MY,33136
-scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
+scenario/scenario_executor.py,sha256=v41UgSHebosXf95FfYIeVUm6s4IbMP_U58FdGoZ_kZU,35653
+scenario/scenario_state.py,sha256=R8PhPHW3obYo3DCjBH5XDdZ6bp4uol7wCXO8K2Tz30I,7101
 scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
-scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
-scenario/user_simulator_agent.py,sha256=kqnSd4_gytzEwtkc06r58UdE1EycZBzejRPzfORDjdo,9619
+scenario/types.py,sha256=CRSCHUplXEXhj6EYQsncwJBzbd2128YTGlFxlk-rrG8,11193
+scenario/user_simulator_agent.py,sha256=gXRaeoivEAcenIEqMDU6bWzv8cOrJaaooNrTdpC9TE4,9630
 scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
-scenario/_events/event_alert_message_logger.py,sha256=n2W3uT8y4x6KKL3H9Ez6CfzJOFlvOfvjDKsdhHUJkxs,2787
+scenario/_events/event_alert_message_logger.py,sha256=XcofGgXjeiTC75NPYheBpHxqA6R4pYAuHZa7-kH9Grg,2975
 scenario/_events/event_bus.py,sha256=IsKNsClF1JFYj728EcxX1hw_KbfDkfJq3Y2Kv4h94n4,9871
 scenario/_events/event_reporter.py,sha256=-6NNbBMy_FYr1O-1FuZ6eIUnLuI8NGRMUr0pybLJrCI,3873
 scenario/_events/events.py,sha256=UtEGY-_1B0LrwpgsNKgrvJBZhRtxuj3K_i6ZBfF7E4Q,6387
 scenario/_events/messages.py,sha256=quwP2OkeaGasNOoaV8GUeosZVKc5XDsde08T0xx_YQo,2297
-scenario/_events/utils.py,sha256=KKqWFGkj4XtofKxM2yi-DBhBQp8wQOdls48iPHGCmUY,3473
+scenario/_events/utils.py,sha256=CRrdDHBD2ptcNIjzW0eEG1V5-Vw1gFnp_UTz5zMQ_Ak,4051
 scenario/_generated/langwatch_api_client/README.md,sha256=Az5f2L4ChOnG_ZtrdBagzRVgeTCtBkbD_S5cIeAry2o,5424
 scenario/_generated/langwatch_api_client/pyproject.toml,sha256=Z8wxuGp4H9BJYVVJB8diW7rRU9XYxtPfw9mU4_wq4cA,560
 scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py,sha256=vVrn17y-3l3fOqeJk8aN3GlStRm2fo0f313l_0LtJNs,368
@@ -235,8 +235,8 @@ scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1
 scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
 scenario/config/model.py,sha256=T4HYA79CW1NxXDkFlyftYR6JzZcowbtIx0H-ijxRyfg,1297
 scenario/config/scenario.py,sha256=6jrtcm0Fo7FpxQta7QIKdGMgl7cXrn374Inzx29hRuk,5406
-langwatch_scenario-0.7.9.dist-info/METADATA,sha256=0s-yAn8iE1N-5dbqugYFpSl8btZrTyyDgWQDat8szxI,20030
-langwatch_scenario-0.7.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langwatch_scenario-0.7.9.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
-langwatch_scenario-0.7.9.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
-langwatch_scenario-0.7.9.dist-info/RECORD,,
+langwatch_scenario-0.7.10.dist-info/METADATA,sha256=pbLZM8UXj1_1TWHjheHP6QREOvRWfX7nHEdfY2ZX4aA,20065
+langwatch_scenario-0.7.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+langwatch_scenario-0.7.10.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
+langwatch_scenario-0.7.10.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
+langwatch_scenario-0.7.10.dist-info/RECORD,,

scenario/_events/event_alert_message_logger.py CHANGED Viewed

@@ -15,6 +15,7 @@ class EventAlertMessageLogger:
     """
     _shown_batch_ids: Set[str] = set()
+    _shown_watch_urls: Set[str] = set()
     def handle_greeting(self) -> None:
         """
@@ -40,6 +41,10 @@ class EventAlertMessageLogger:
         if self._is_greeting_disabled():
             return
+        if set_url in EventAlertMessageLogger._shown_watch_urls:
+            return
+        EventAlertMessageLogger._shown_watch_urls.add(set_url)
         self._display_watch_message(set_url)
     def _is_greeting_disabled(self) -> bool:

scenario/_events/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import warnings
-from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ..types import ChatCompletionMessageParamWithTrace
 from .events import MessageType
 from .messages import (
     SystemMessage,
@@ -12,7 +13,10 @@ from .messages import (
 from typing import List
 from pksuid import PKSUID
-def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessageParam]) -> list[MessageType]:
+def convert_messages_to_api_client_messages(
+    messages: list[ChatCompletionMessageParamWithTrace],
+) -> list[MessageType]:
     """
     Converts OpenAI ChatCompletionMessageParam messages to API client Message format.
@@ -33,7 +37,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
     for i, message in enumerate(messages):
         # Generate unique ID for each message
-        message_id = message.get("id") or str(PKSUID('scenariomsg'))
+        message_id = message.get("id") or str(PKSUID("scenariomsg"))
         role = message.get("role")
         content = message.get("content")
@@ -41,11 +45,13 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
         if role == "user":
             if not content:
                 raise ValueError(f"User message at index {i} missing required content")
-            converted_messages.append(UserMessage(
+            message_ = UserMessage(
                 id=message_id,
                 role="user",
-                content=str(content)
-            ))
+                content=str(content),
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         elif role == "assistant":
             # Handle tool calls if present
             tool_calls = message.get("tool_calls")
@@ -53,44 +59,54 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
             if tool_calls:
                 for tool_call in tool_calls:
-                    api_tool_calls.append(ToolCall(
-                        id=tool_call.get("id", str(PKSUID('scenariotoolcall'))),
-                        type_="function",
-                        function=FunctionCall(
-                            name=tool_call["function"].get("name", "unknown"),
-                            arguments=tool_call["function"].get("arguments", "{}")
+                    api_tool_calls.append(
+                        ToolCall(
+                            id=tool_call.get("id", str(PKSUID("scenariotoolcall"))),
+                            type_="function",
+                            function=FunctionCall(
+                                name=tool_call["function"].get("name", "unknown"),
+                                arguments=tool_call["function"].get("arguments", "{}"),
+                            ),
                         )
-                    ))
+                    )
-            converted_messages.append(AssistantMessage(
+            message_ = AssistantMessage(
                 id=message_id,
                 role="assistant",
                 content=str(content),
-                tool_calls=api_tool_calls
-            ))
+                tool_calls=api_tool_calls,
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         elif role == "system":
             if not content:
-                raise ValueError(f"System message at index {i} missing required content")
-            converted_messages.append(SystemMessage(
-                id=message_id,
-                role="system",
-                content=str(content)
-            ))
+                raise ValueError(
+                    f"System message at index {i} missing required content"
+                )
+            message_ = SystemMessage(id=message_id, role="system", content=str(content))
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         elif role == "tool":
             tool_call_id = message.get("tool_call_id")
             if not tool_call_id:
-                warnings.warn(f"Tool message at index {i} missing required tool_call_id, skipping tool message")
+                warnings.warn(
+                    f"Tool message at index {i} missing required tool_call_id, skipping tool message"
+                )
                 continue
             if not content:
-                warnings.warn(f"Tool message at index {i} missing required content, skipping tool message")
+                warnings.warn(
+                    f"Tool message at index {i} missing required content, skipping tool message"
+                )
                 continue
-            converted_messages.append(ToolMessage(
+            message_ = ToolMessage(
                 id=message_id,
                 role="tool",
                 content=str(content),
-                tool_call_id=tool_call_id
-            ))
+                tool_call_id=tool_call_id,
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         else:
             raise ValueError(f"Unsupported message role '{role}' at index {i}")

scenario/judge_agent.py CHANGED Viewed

@@ -12,7 +12,8 @@ import logging
 import re
 from typing import List, Optional, cast
-from litellm import Choices, completion
+import litellm
+from litellm import Choices
 from litellm.files.main import ModelResponse
 from scenario.cache import scenario_cache
@@ -356,7 +357,7 @@ if you don't have enough information to make a verdict, say inconclusive with ma
         response = cast(
             ModelResponse,
-            completion(
+            litellm.completion(
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,

scenario/scenario_executor.py CHANGED Viewed

@@ -6,6 +6,7 @@ of scenario tests, managing the interaction between user simulators, agents unde
 and judge agents to determine test success or failure.
 """
+import json
 import sys
 from typing import (
     Awaitable,
@@ -17,6 +18,7 @@ from typing import (
     Tuple,
     Union,
     TypedDict,
+    cast,
 )
 import time
 import warnings
@@ -33,6 +35,7 @@ from scenario._utils import (
     await_if_awaitable,
     get_batch_run_id,
     generate_scenario_run_id,
+    SerializableWithStringFallback,
 )
 from openai.types.chat import (
     ChatCompletionMessageParam,
@@ -40,7 +43,7 @@ from openai.types.chat import (
     ChatCompletionAssistantMessageParam,
 )
-from .types import AgentInput, AgentRole, ScenarioResult, ScriptStep
+from .types import AgentInput, AgentRole, ChatCompletionMessageParamWithTrace, ScenarioResult, ScriptStep
 from ._error_messages import agent_response_not_awaitable
 from .cache import context_scenario
 from .agent_adapter import AgentAdapter
@@ -62,6 +65,11 @@ from ._events import (
 from rx.subject.subject import Subject
 from rx.core.observable.observable import Observable
+import litellm
+import langwatch
+import langwatch.telemetry.context
+from langwatch.telemetry.tracing import LangWatchTrace
 class ScenarioExecutor:
     """
@@ -101,6 +109,7 @@ class ScenarioExecutor:
     _pending_agents_on_turn: Set[AgentAdapter] = set()
     _agent_times: Dict[int, float] = {}
     _events: Subject
+    _trace: LangWatchTrace
     event_bus: ScenarioEventBus
@@ -157,7 +166,8 @@ class ScenarioExecutor:
         )
         self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
-        self.reset()
+        self.batch_run_id = get_batch_run_id()
+        self.scenario_set_id = set_id or "default"
         # Create executor's own event stream
         self._events = Subject()
@@ -166,9 +176,6 @@ class ScenarioExecutor:
         self.event_bus = event_bus or ScenarioEventBus()
         self.event_bus.subscribe_to_events(self._events)
-        self.batch_run_id = get_batch_run_id()
-        self.scenario_set_id = set_id or "default"
     @property
     def events(self) -> Observable:
         """Expose event stream for subscribers like the event bus."""
@@ -253,6 +260,8 @@ class ScenarioExecutor:
             )
             ```
         """
+        message = cast(ChatCompletionMessageParamWithTrace, message)
+        message["trace_id"] = self._trace.trace_id
         self._state.messages.append(message)
         # Broadcast the message to other agents
@@ -263,6 +272,21 @@ class ScenarioExecutor:
                 self._pending_messages[idx] = []
             self._pending_messages[idx].append(message)
+        # Update trace with input/output
+        if message["role"] == "user":
+            self._trace.update(input={"type": "text", "value": str(message["content"])})
+        elif message["role"] == "assistant":
+            self._trace.update(
+                output={
+                    "type": "text",
+                    "value": str(
+                        message["content"]
+                        if "content" in message
+                        else json.dumps(message, cls=SerializableWithStringFallback)
+                    ),
+                }
+            )
     def add_messages(
         self,
         messages: List[ChatCompletionMessageParam],
@@ -292,6 +316,21 @@ class ScenarioExecutor:
             self.add_message(message, from_agent_idx)
     def _new_turn(self):
+        if hasattr(self, "_trace") and self._trace is not None:
+            self._trace.__exit__(None, None, None)
+        self._trace = langwatch.trace(
+            name="Scenario Turn",
+            metadata={
+                "labels": ["scenario"],
+                "thread_id": self._state.thread_id,
+                "scenario.name": self.name,
+                "scenario.batch_id": self.batch_run_id,
+                "scenario.set_id": self.scenario_set_id,
+                "scenario.turn": self._state.current_turn,
+            },
+        ).__enter__()
         self._pending_agents_on_turn = set(self.agents)
         self._pending_roles_on_turn = [
             AgentRole.USER,
@@ -460,7 +499,7 @@ class ScenarioExecutor:
     async def _call_agent(
         self, idx: int, role: AgentRole, request_judgment: bool = False
-    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
+    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult, None]:
         agent = self.agents[idx]
         if role == AgentRole.USER and self.config.debug:
@@ -482,67 +521,84 @@ class ScenarioExecutor:
                     ChatCompletionUserMessageParam(role="user", content=input_message)
                 ]
-        with show_spinner(
-            text=(
-                "Judging..."
-                if role == AgentRole.JUDGE
-                else f"{role.value if isinstance(role, AgentRole) else role}:"
-            ),
-            color=(
-                "blue"
-                if role == AgentRole.AGENT
-                else "green" if role == AgentRole.USER else "yellow"
-            ),
-            enabled=self.config.verbose,
-        ):
-            start_time = time.time()
-            # Prevent pydantic validation warnings which should already be disabled
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                agent_response = agent.call(
-                    AgentInput(
-                        # TODO: test thread_id
-                        thread_id=self._state.thread_id,
-                        messages=self._state.messages,
-                        new_messages=self._pending_messages.get(idx, []),
-                        judgment_request=request_judgment,
-                        scenario_state=self._state,
+        with self._trace.span(type="agent", name=f"{agent.__class__.__name__}.call") as span:
+            with show_spinner(
+                text=(
+                    "Judging..."
+                    if role == AgentRole.JUDGE
+                    else f"{role.value if isinstance(role, AgentRole) else role}:"
+                ),
+                color=(
+                    "blue"
+                    if role == AgentRole.AGENT
+                    else "green" if role == AgentRole.USER else "yellow"
+                ),
+                enabled=self.config.verbose,
+            ):
+                start_time = time.time()
+                # Prevent pydantic validation warnings which should already be disabled
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    self._trace.autotrack_litellm_calls(litellm)
+                    agent_response = agent.call(
+                        AgentInput(
+                            # TODO: test thread_id
+                            thread_id=self._state.thread_id,
+                            messages=cast(List[ChatCompletionMessageParam], self._state.messages),
+                            new_messages=self._pending_messages.get(idx, []),
+                            judgment_request=request_judgment,
+                            scenario_state=self._state,
+                        )
+                    )
+                if not isinstance(agent_response, Awaitable):
+                    raise Exception(
+                        agent_response_not_awaitable(agent.__class__.__name__),
                     )
-                )
-            if not isinstance(agent_response, Awaitable):
-                raise Exception(
-                    agent_response_not_awaitable(agent.__class__.__name__),
-                )
-            agent_response = await agent_response
+                agent_response = await agent_response
-            if idx not in self._agent_times:
-                self._agent_times[idx] = 0
-            self._agent_times[idx] += time.time() - start_time
+                if idx not in self._agent_times:
+                    self._agent_times[idx] = 0
+                self._agent_times[idx] += time.time() - start_time
-            self._pending_messages[idx] = []
-            check_valid_return_type(agent_response, agent.__class__.__name__)
+                self._pending_messages[idx] = []
+                check_valid_return_type(agent_response, agent.__class__.__name__)
+                messages = []
+                if isinstance(agent_response, ScenarioResult):
+                    # TODO: should be an event
+                    span.add_evaluation(
+                        name=f"{agent.__class__.__name__} Judgment",
+                        status="processed",
+                        passed=agent_response.success,
+                        details=agent_response.reasoning,
+                        score=(
+                            len(agent_response.passed_criteria)
+                            / len(agent_response.failed_criteria)
+                            if agent_response.failed_criteria
+                            else 1.0
+                        ),
+                    )
-            messages = []
-            if isinstance(agent_response, ScenarioResult):
-                # TODO: should be an event
-                return agent_response
-            else:
-                messages = convert_agent_return_types_to_openai_messages(
-                    agent_response,
-                    role="user" if role == AgentRole.USER else "assistant",
-                )
+                    return agent_response
+                else:
+                    messages = convert_agent_return_types_to_openai_messages(
+                        agent_response,
+                        role="user" if role == AgentRole.USER else "assistant",
+                    )
-            self.add_messages(messages, from_agent_idx=idx)
+                self.add_messages(messages, from_agent_idx=idx)
-            if messages and self.config.verbose:
-                print_openai_messages(
-                    self._scenario_name(),
-                    [m for m in messages if m["role"] != "system"],
-                )
+                if messages and self.config.verbose:
+                    print_openai_messages(
+                        self._scenario_name(),
+                        [m for m in messages if m["role"] != "system"],
+                    )
-            return messages
+                return messages
     def _scenario_name(self):
         if self.config.verbose == 2:
@@ -817,6 +873,7 @@ class ScenarioExecutor:
         # Signal end of event stream
         self._events.on_completed()
+        self._trace.__exit__(None, None, None)
 async def run(

scenario/scenario_state.py CHANGED Viewed

@@ -14,6 +14,7 @@ from openai.types.chat import (
 )
 from pydantic import BaseModel
+from scenario.types import ChatCompletionMessageParamWithTrace
 from scenario.config import ScenarioConfig
 if TYPE_CHECKING:
@@ -70,7 +71,7 @@ class ScenarioState(BaseModel):
     """
     description: str
-    messages: List[ChatCompletionMessageParam]
+    messages: List[ChatCompletionMessageParamWithTrace]
     thread_id: str
     current_turn: int
     config: ScenarioConfig

scenario/types.py CHANGED Viewed

@@ -8,10 +8,20 @@ from typing import (
     Callable,
     List,
     Optional,
+    TypeAlias,
     Union,
 )
-from openai.types.chat import ChatCompletionMessageParam, ChatCompletionUserMessageParam
+from openai.types.chat import (
+    ChatCompletionMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionFunctionMessageParam,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionDeveloperMessageParam,
+)
 # Prevent circular imports + Pydantic breaking
 if TYPE_CHECKING:
@@ -22,6 +32,48 @@ else:
     ScenarioStateType = Any
+# Since Python types do not support intersection, we need to wrap ALL the chat completion
+# message types with the trace_id field
+class ChatCompletionDeveloperMessageParamWithTrace(ChatCompletionDeveloperMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionSystemMessageParamWithTrace(ChatCompletionSystemMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionUserMessageParamWithTrace(ChatCompletionUserMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionAssistantMessageParamWithTrace(ChatCompletionAssistantMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionToolMessageParamWithTrace(ChatCompletionToolMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionFunctionMessageParamWithTrace(ChatCompletionFunctionMessageParam):
+    trace_id: Optional[str]
+"""
+A wrapper around ChatCompletionMessageParam that adds a trace_id field to be able to
+tie back each message of the scenario run to a trace.
+"""
+ChatCompletionMessageParamWithTrace: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParamWithTrace,
+    ChatCompletionSystemMessageParamWithTrace,
+    ChatCompletionUserMessageParamWithTrace,
+    ChatCompletionAssistantMessageParamWithTrace,
+    ChatCompletionToolMessageParamWithTrace,
+    ChatCompletionFunctionMessageParamWithTrace,
+]
 class AgentRole(Enum):
     """
     Defines the different roles that agents can play in a scenario.
@@ -171,7 +223,7 @@ class ScenarioResult(BaseModel):
     success: bool
     # Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
-    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
+    messages: Annotated[List[ChatCompletionMessageParamWithTrace], SkipValidation]
     reasoning: Optional[str] = None
     passed_criteria: List[str] = []
     failed_criteria: List[str] = []

scenario/user_simulator_agent.py CHANGED Viewed

@@ -10,7 +10,8 @@ conversation history.
 import logging
 from typing import Optional, cast
-from litellm import Choices, completion
+import litellm
+from litellm import Choices
 from litellm.files.main import ModelResponse
 from scenario.cache import scenario_cache
@@ -228,7 +229,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
         response = cast(
             ModelResponse,
-            completion(
+            litellm.completion(
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,

{langwatch_scenario-0.7.9.dist-info → langwatch_scenario-0.7.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{langwatch_scenario-0.7.9.dist-info → langwatch_scenario-0.7.10.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langwatch_scenario-0.7.9.dist-info → langwatch_scenario-0.7.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

langwatch-scenario 0.7.9__py3-none-any.whl → 0.7.10__py3-none-any.whl

langwatch-scenario 0.7.9py3-none-any.whl → 0.7.10py3-none-any.whl