PyPI - langwatch-scenario - Versions diffs - 0.7.9__tar.gz → 0.7.10__tar.gz - Mend

langwatch-scenario 0.7.9tar.gz → 0.7.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (256) hide show

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langwatch-scenario
-Version: 0.7.9
+Version: 0.7.10
 Summary: The end-to-end agent testing library
 Author-email: LangWatch Team <support@langwatch.ai>
 License: MIT
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: pytest>=8.1.1
 Requires-Dist: litellm>=1.49.0
@@ -31,6 +31,7 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: rx>=3.2.0
 Requires-Dist: python-dateutil>=2.9.0.post0
 Requires-Dist: pydantic-settings>=2.9.1
+Requires-Dist: langwatch>=0.2.19
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: isort; extra == "dev"

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/langwatch_scenario.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langwatch-scenario
-Version: 0.7.9
+Version: 0.7.10
 Summary: The end-to-end agent testing library
 Author-email: LangWatch Team <support@langwatch.ai>
 License: MIT
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: pytest>=8.1.1
 Requires-Dist: litellm>=1.49.0
@@ -31,6 +31,7 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: rx>=3.2.0
 Requires-Dist: python-dateutil>=2.9.0.post0
 Requires-Dist: pydantic-settings>=2.9.1
+Requires-Dist: langwatch>=0.2.19
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: isort; extra == "dev"

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/langwatch_scenario.egg-info/requires.txt RENAMED Viewed

@@ -13,6 +13,7 @@ httpx>=0.27.0
 rx>=3.2.0
 python-dateutil>=2.9.0.post0
 pydantic-settings>=2.9.1
+langwatch>=0.2.19
 [dev]
 black

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/pyproject.toml RENAMED Viewed

@@ -4,12 +4,12 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "langwatch-scenario"
-version = "0.7.9"
+version = "0.7.10"
 description = "The end-to-end agent testing library"
 readme = "README.md"
 authors = [{ name = "LangWatch Team", email = "support@langwatch.ai" }]
 license = { text = "MIT" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
@@ -36,6 +36,7 @@ dependencies = [
     "rx>=3.2.0",
     "python-dateutil>=2.9.0.post0",
     "pydantic-settings>=2.9.1",
+    "langwatch>=0.2.19",
 ]
 [project.optional-dependencies]
@@ -67,14 +68,11 @@ markers = ["agent_test: marks tests as agent scenario tests"]
 [dependency-groups]
 dev = [
     "function-schema>=0.4.5",
     "pre-commit>=4.2.0",
     "pydantic-ai>=0.0.52",
-    "pyright>=1.1.401",
+    "pyright>=1.1.405",
     "pytest-asyncio-concurrent>=0.4.1",
     "pdoc3>=0.11.6",
     "respx>=0.22.0",
 ]

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/scenario/_events/event_alert_message_logger.py RENAMED Viewed

@@ -15,6 +15,7 @@ class EventAlertMessageLogger:
     """
     _shown_batch_ids: Set[str] = set()
+    _shown_watch_urls: Set[str] = set()
     def handle_greeting(self) -> None:
         """
@@ -40,6 +41,10 @@ class EventAlertMessageLogger:
         if self._is_greeting_disabled():
             return
+        if set_url in EventAlertMessageLogger._shown_watch_urls:
+            return
+        EventAlertMessageLogger._shown_watch_urls.add(set_url)
         self._display_watch_message(set_url)
     def _is_greeting_disabled(self) -> bool:

langwatch_scenario-0.7.10/scenario/_events/utils.py ADDED Viewed

@@ -0,0 +1,113 @@
+import warnings
+from ..types import ChatCompletionMessageParamWithTrace
+from .events import MessageType
+from .messages import (
+    SystemMessage,
+    AssistantMessage,
+    UserMessage,
+    ToolMessage,
+    ToolCall,
+    FunctionCall,
+)
+from typing import List
+from pksuid import PKSUID
+def convert_messages_to_api_client_messages(
+    messages: list[ChatCompletionMessageParamWithTrace],
+) -> list[MessageType]:
+    """
+    Converts OpenAI ChatCompletionMessageParam messages to API client Message format.
+    This function transforms messages from OpenAI's format to the API client format
+    that matches the expected schema for ScenarioMessageSnapshotEvent.
+    Args:
+        messages: List of OpenAI ChatCompletionMessageParam messages
+    Returns:
+        List of API client Message objects
+    Raises:
+        ValueError: If message role is not supported or message format is invalid
+    """
+    converted_messages: list[MessageType] = []
+    for i, message in enumerate(messages):
+        # Generate unique ID for each message
+        message_id = message.get("id") or str(PKSUID("scenariomsg"))
+        role = message.get("role")
+        content = message.get("content")
+        if role == "user":
+            if not content:
+                raise ValueError(f"User message at index {i} missing required content")
+            message_ = UserMessage(
+                id=message_id,
+                role="user",
+                content=str(content),
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
+        elif role == "assistant":
+            # Handle tool calls if present
+            tool_calls = message.get("tool_calls")
+            api_tool_calls: List[ToolCall] = []
+            if tool_calls:
+                for tool_call in tool_calls:
+                    api_tool_calls.append(
+                        ToolCall(
+                            id=tool_call.get("id", str(PKSUID("scenariotoolcall"))),
+                            type_="function",
+                            function=FunctionCall(
+                                name=tool_call["function"].get("name", "unknown"),
+                                arguments=tool_call["function"].get("arguments", "{}"),
+                            ),
+                        )
+                    )
+            message_ = AssistantMessage(
+                id=message_id,
+                role="assistant",
+                content=str(content),
+                tool_calls=api_tool_calls,
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
+        elif role == "system":
+            if not content:
+                raise ValueError(
+                    f"System message at index {i} missing required content"
+                )
+            message_ = SystemMessage(id=message_id, role="system", content=str(content))
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
+        elif role == "tool":
+            tool_call_id = message.get("tool_call_id")
+            if not tool_call_id:
+                warnings.warn(
+                    f"Tool message at index {i} missing required tool_call_id, skipping tool message"
+                )
+                continue
+            if not content:
+                warnings.warn(
+                    f"Tool message at index {i} missing required content, skipping tool message"
+                )
+                continue
+            message_ = ToolMessage(
+                id=message_id,
+                role="tool",
+                content=str(content),
+                tool_call_id=tool_call_id,
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
+        else:
+            raise ValueError(f"Unsupported message role '{role}' at index {i}")
+    return converted_messages

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/scenario/judge_agent.py RENAMED Viewed

@@ -12,7 +12,8 @@ import logging
 import re
 from typing import List, Optional, cast
-from litellm import Choices, completion
+import litellm
+from litellm import Choices
 from litellm.files.main import ModelResponse
 from scenario.cache import scenario_cache
@@ -356,7 +357,7 @@ if you don't have enough information to make a verdict, say inconclusive with ma
         response = cast(
             ModelResponse,
-            completion(
+            litellm.completion(
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/scenario/scenario_executor.py RENAMED Viewed

@@ -6,6 +6,7 @@ of scenario tests, managing the interaction between user simulators, agents unde
 and judge agents to determine test success or failure.
 """
+import json
 import sys
 from typing import (
     Awaitable,
@@ -17,6 +18,7 @@ from typing import (
     Tuple,
     Union,
     TypedDict,
+    cast,
 )
 import time
 import warnings
@@ -33,6 +35,7 @@ from scenario._utils import (
     await_if_awaitable,
     get_batch_run_id,
     generate_scenario_run_id,
+    SerializableWithStringFallback,
 )
 from openai.types.chat import (
     ChatCompletionMessageParam,
@@ -40,7 +43,7 @@ from openai.types.chat import (
     ChatCompletionAssistantMessageParam,
 )
-from .types import AgentInput, AgentRole, ScenarioResult, ScriptStep
+from .types import AgentInput, AgentRole, ChatCompletionMessageParamWithTrace, ScenarioResult, ScriptStep
 from ._error_messages import agent_response_not_awaitable
 from .cache import context_scenario
 from .agent_adapter import AgentAdapter
@@ -62,6 +65,11 @@ from ._events import (
 from rx.subject.subject import Subject
 from rx.core.observable.observable import Observable
+import litellm
+import langwatch
+import langwatch.telemetry.context
+from langwatch.telemetry.tracing import LangWatchTrace
 class ScenarioExecutor:
     """
@@ -101,6 +109,7 @@ class ScenarioExecutor:
     _pending_agents_on_turn: Set[AgentAdapter] = set()
     _agent_times: Dict[int, float] = {}
     _events: Subject
+    _trace: LangWatchTrace
     event_bus: ScenarioEventBus
@@ -157,7 +166,8 @@ class ScenarioExecutor:
         )
         self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
-        self.reset()
+        self.batch_run_id = get_batch_run_id()
+        self.scenario_set_id = set_id or "default"
         # Create executor's own event stream
         self._events = Subject()
@@ -166,9 +176,6 @@ class ScenarioExecutor:
         self.event_bus = event_bus or ScenarioEventBus()
         self.event_bus.subscribe_to_events(self._events)
-        self.batch_run_id = get_batch_run_id()
-        self.scenario_set_id = set_id or "default"
     @property
     def events(self) -> Observable:
         """Expose event stream for subscribers like the event bus."""
@@ -253,6 +260,8 @@ class ScenarioExecutor:
             )
             ```
         """
+        message = cast(ChatCompletionMessageParamWithTrace, message)
+        message["trace_id"] = self._trace.trace_id
         self._state.messages.append(message)
         # Broadcast the message to other agents
@@ -263,6 +272,21 @@ class ScenarioExecutor:
                 self._pending_messages[idx] = []
             self._pending_messages[idx].append(message)
+        # Update trace with input/output
+        if message["role"] == "user":
+            self._trace.update(input={"type": "text", "value": str(message["content"])})
+        elif message["role"] == "assistant":
+            self._trace.update(
+                output={
+                    "type": "text",
+                    "value": str(
+                        message["content"]
+                        if "content" in message
+                        else json.dumps(message, cls=SerializableWithStringFallback)
+                    ),
+                }
+            )
     def add_messages(
         self,
         messages: List[ChatCompletionMessageParam],
@@ -292,6 +316,21 @@ class ScenarioExecutor:
             self.add_message(message, from_agent_idx)
     def _new_turn(self):
+        if hasattr(self, "_trace") and self._trace is not None:
+            self._trace.__exit__(None, None, None)
+        self._trace = langwatch.trace(
+            name="Scenario Turn",
+            metadata={
+                "labels": ["scenario"],
+                "thread_id": self._state.thread_id,
+                "scenario.name": self.name,
+                "scenario.batch_id": self.batch_run_id,
+                "scenario.set_id": self.scenario_set_id,
+                "scenario.turn": self._state.current_turn,
+            },
+        ).__enter__()
         self._pending_agents_on_turn = set(self.agents)
         self._pending_roles_on_turn = [
             AgentRole.USER,
@@ -460,7 +499,7 @@ class ScenarioExecutor:
     async def _call_agent(
         self, idx: int, role: AgentRole, request_judgment: bool = False
-    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
+    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult, None]:
         agent = self.agents[idx]
         if role == AgentRole.USER and self.config.debug:
@@ -482,67 +521,84 @@ class ScenarioExecutor:
                     ChatCompletionUserMessageParam(role="user", content=input_message)
                 ]
-        with show_spinner(
-            text=(
-                "Judging..."
-                if role == AgentRole.JUDGE
-                else f"{role.value if isinstance(role, AgentRole) else role}:"
-            ),
-            color=(
-                "blue"
-                if role == AgentRole.AGENT
-                else "green" if role == AgentRole.USER else "yellow"
-            ),
-            enabled=self.config.verbose,
-        ):
-            start_time = time.time()
-            # Prevent pydantic validation warnings which should already be disabled
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                agent_response = agent.call(
-                    AgentInput(
-                        # TODO: test thread_id
-                        thread_id=self._state.thread_id,
-                        messages=self._state.messages,
-                        new_messages=self._pending_messages.get(idx, []),
-                        judgment_request=request_judgment,
-                        scenario_state=self._state,
+        with self._trace.span(type="agent", name=f"{agent.__class__.__name__}.call") as span:
+            with show_spinner(
+                text=(
+                    "Judging..."
+                    if role == AgentRole.JUDGE
+                    else f"{role.value if isinstance(role, AgentRole) else role}:"
+                ),
+                color=(
+                    "blue"
+                    if role == AgentRole.AGENT
+                    else "green" if role == AgentRole.USER else "yellow"
+                ),
+                enabled=self.config.verbose,
+            ):
+                start_time = time.time()
+                # Prevent pydantic validation warnings which should already be disabled
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    self._trace.autotrack_litellm_calls(litellm)
+                    agent_response = agent.call(
+                        AgentInput(
+                            # TODO: test thread_id
+                            thread_id=self._state.thread_id,
+                            messages=cast(List[ChatCompletionMessageParam], self._state.messages),
+                            new_messages=self._pending_messages.get(idx, []),
+                            judgment_request=request_judgment,
+                            scenario_state=self._state,
+                        )
+                    )
+                if not isinstance(agent_response, Awaitable):
+                    raise Exception(
+                        agent_response_not_awaitable(agent.__class__.__name__),
                     )
-                )
-            if not isinstance(agent_response, Awaitable):
-                raise Exception(
-                    agent_response_not_awaitable(agent.__class__.__name__),
-                )
-            agent_response = await agent_response
+                agent_response = await agent_response
-            if idx not in self._agent_times:
-                self._agent_times[idx] = 0
-            self._agent_times[idx] += time.time() - start_time
+                if idx not in self._agent_times:
+                    self._agent_times[idx] = 0
+                self._agent_times[idx] += time.time() - start_time
-            self._pending_messages[idx] = []
-            check_valid_return_type(agent_response, agent.__class__.__name__)
+                self._pending_messages[idx] = []
+                check_valid_return_type(agent_response, agent.__class__.__name__)
+                messages = []
+                if isinstance(agent_response, ScenarioResult):
+                    # TODO: should be an event
+                    span.add_evaluation(
+                        name=f"{agent.__class__.__name__} Judgment",
+                        status="processed",
+                        passed=agent_response.success,
+                        details=agent_response.reasoning,
+                        score=(
+                            len(agent_response.passed_criteria)
+                            / len(agent_response.failed_criteria)
+                            if agent_response.failed_criteria
+                            else 1.0
+                        ),
+                    )
-            messages = []
-            if isinstance(agent_response, ScenarioResult):
-                # TODO: should be an event
-                return agent_response
-            else:
-                messages = convert_agent_return_types_to_openai_messages(
-                    agent_response,
-                    role="user" if role == AgentRole.USER else "assistant",
-                )
+                    return agent_response
+                else:
+                    messages = convert_agent_return_types_to_openai_messages(
+                        agent_response,
+                        role="user" if role == AgentRole.USER else "assistant",
+                    )
-            self.add_messages(messages, from_agent_idx=idx)
+                self.add_messages(messages, from_agent_idx=idx)
-            if messages and self.config.verbose:
-                print_openai_messages(
-                    self._scenario_name(),
-                    [m for m in messages if m["role"] != "system"],
-                )
+                if messages and self.config.verbose:
+                    print_openai_messages(
+                        self._scenario_name(),
+                        [m for m in messages if m["role"] != "system"],
+                    )
-            return messages
+                return messages
     def _scenario_name(self):
         if self.config.verbose == 2:
@@ -817,6 +873,7 @@ class ScenarioExecutor:
         # Signal end of event stream
         self._events.on_completed()
+        self._trace.__exit__(None, None, None)
 async def run(

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/scenario/scenario_state.py RENAMED Viewed

@@ -14,6 +14,7 @@ from openai.types.chat import (
 )
 from pydantic import BaseModel
+from scenario.types import ChatCompletionMessageParamWithTrace
 from scenario.config import ScenarioConfig
 if TYPE_CHECKING:
@@ -70,7 +71,7 @@ class ScenarioState(BaseModel):
     """
     description: str
-    messages: List[ChatCompletionMessageParam]
+    messages: List[ChatCompletionMessageParamWithTrace]
     thread_id: str
     current_turn: int
     config: ScenarioConfig

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/scenario/types.py RENAMED Viewed

@@ -8,10 +8,20 @@ from typing import (
     Callable,
     List,
     Optional,
+    TypeAlias,
     Union,
 )
-from openai.types.chat import ChatCompletionMessageParam, ChatCompletionUserMessageParam
+from openai.types.chat import (
+    ChatCompletionMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionFunctionMessageParam,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionDeveloperMessageParam,
+)
 # Prevent circular imports + Pydantic breaking
 if TYPE_CHECKING:
@@ -22,6 +32,48 @@ else:
     ScenarioStateType = Any
+# Since Python types do not support intersection, we need to wrap ALL the chat completion
+# message types with the trace_id field
+class ChatCompletionDeveloperMessageParamWithTrace(ChatCompletionDeveloperMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionSystemMessageParamWithTrace(ChatCompletionSystemMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionUserMessageParamWithTrace(ChatCompletionUserMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionAssistantMessageParamWithTrace(ChatCompletionAssistantMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionToolMessageParamWithTrace(ChatCompletionToolMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionFunctionMessageParamWithTrace(ChatCompletionFunctionMessageParam):
+    trace_id: Optional[str]
+"""
+A wrapper around ChatCompletionMessageParam that adds a trace_id field to be able to
+tie back each message of the scenario run to a trace.
+"""
+ChatCompletionMessageParamWithTrace: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParamWithTrace,
+    ChatCompletionSystemMessageParamWithTrace,
+    ChatCompletionUserMessageParamWithTrace,
+    ChatCompletionAssistantMessageParamWithTrace,
+    ChatCompletionToolMessageParamWithTrace,
+    ChatCompletionFunctionMessageParamWithTrace,
+]
 class AgentRole(Enum):
     """
     Defines the different roles that agents can play in a scenario.
@@ -171,7 +223,7 @@ class ScenarioResult(BaseModel):
     success: bool
     # Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
-    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
+    messages: Annotated[List[ChatCompletionMessageParamWithTrace], SkipValidation]
     reasoning: Optional[str] = None
     passed_criteria: List[str] = []
     failed_criteria: List[str] = []

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/scenario/user_simulator_agent.py RENAMED Viewed

@@ -10,7 +10,8 @@ conversation history.
 import logging
 from typing import Optional, cast
-from litellm import Choices, completion
+import litellm
+from litellm import Choices
 from litellm.files.main import ModelResponse
 from scenario.cache import scenario_cache
@@ -228,7 +229,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
         response = cast(
             ModelResponse,
-            completion(
+            litellm.completion(
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,

{langwatch_scenario-0.7.9 → langwatch_scenario-0.7.10}/tests/test_scenario.py RENAMED Viewed

@@ -98,6 +98,10 @@ async def test_scenario_allow_scripted_scenario():
             self,
             input: scenario.AgentInput,
         ) -> scenario.AgentReturnTypes:
+            for message in input.new_messages:
+                if "trace_id" in message:
+                    del message["trace_id"]
             assert input.new_messages == [
                 {
                     "role": "user",
@@ -138,6 +142,10 @@ async def test_scenario_allow_scripted_scenario_with_lower_level_openai_messages
             self,
             input: scenario.AgentInput,
         ) -> scenario.AgentReturnTypes:
+            for message in input.new_messages:
+                if "trace_id" in message:
+                    del message["trace_id"]
             assert input.new_messages == [
                 {
                     "role": "user",

langwatch-scenario 0.7.9__tar.gz → 0.7.10__tar.gz

langwatch-scenario 0.7.9tar.gz → 0.7.10tar.gz