PyPI - langwatch-scenario - Versions diffs - 0.7.8__py3-none-any.whl → 0.7.10__py3-none-any.whl - Mend

langwatch-scenario 0.7.8py3-none-any.whl → 0.7.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/METADATA +4 -3
{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/RECORD +19 -18
scenario/_events/event_alert_message_logger.py +20 -29
scenario/_events/event_bus.py +4 -1
scenario/_events/event_reporter.py +8 -3
scenario/_events/utils.py +44 -28
scenario/_utils/__init__.py +2 -2
scenario/_utils/ids.py +12 -12
scenario/config/scenario.py +8 -0
scenario/judge_agent.py +4 -3
scenario/py.typed +0 -0
scenario/pytest_plugin.py +5 -0
scenario/scenario_executor.py +118 -60
scenario/scenario_state.py +2 -1
scenario/types.py +54 -2
scenario/user_simulator_agent.py +3 -2
{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/top_level.txt +0 -0

{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langwatch-scenario
-Version: 0.7.8
+Version: 0.7.10
 Summary: The end-to-end agent testing library
 Author-email: LangWatch Team <support@langwatch.ai>
 License: MIT
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: pytest>=8.1.1
 Requires-Dist: litellm>=1.49.0
@@ -31,6 +31,7 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: rx>=3.2.0
 Requires-Dist: python-dateutil>=2.9.0.post0
 Requires-Dist: pydantic-settings>=2.9.1
+Requires-Dist: langwatch>=0.2.19
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: isort; extra == "dev"
@@ -457,7 +458,7 @@ This will cache any function call you decorate when running the tests and make t
 While optional, we strongly recommend setting stable identifiers for your scenarios, sets, and batches for better organization and tracking in LangWatch.
 - **set_id**: Groups related scenarios into a test suite. This corresponds to the "Simulation Set" in the UI.
-- **batch_run_id**: Groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
+- **SCENARIO_BATCH_RUN_ID**: Env variable that groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
 ```python
 import os

{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/RECORD RENAMED Viewed

@@ -2,20 +2,21 @@ scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
 scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
 scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
 scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
-scenario/judge_agent.py,sha256=gWRWzIfHBjAYBRXant6n5fL_E2P3A2IGNvIyp9nUb30,16728
-scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
-scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
-scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
+scenario/judge_agent.py,sha256=hHQ2nKsOgSyTtN0LdE6xIF0wZnnlYLN6RcxTPecFHDU,16770
+scenario/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+scenario/pytest_plugin.py,sha256=wRCuGD9uwrrLt2fY15zK6mnmY9W_dO_m0WalPJYE5II,11491
+scenario/scenario_executor.py,sha256=v41UgSHebosXf95FfYIeVUm6s4IbMP_U58FdGoZ_kZU,35653
+scenario/scenario_state.py,sha256=R8PhPHW3obYo3DCjBH5XDdZ6bp4uol7wCXO8K2Tz30I,7101
 scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
-scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
-scenario/user_simulator_agent.py,sha256=kqnSd4_gytzEwtkc06r58UdE1EycZBzejRPzfORDjdo,9619
+scenario/types.py,sha256=CRSCHUplXEXhj6EYQsncwJBzbd2128YTGlFxlk-rrG8,11193
+scenario/user_simulator_agent.py,sha256=gXRaeoivEAcenIEqMDU6bWzv8cOrJaaooNrTdpC9TE4,9630
 scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
-scenario/_events/event_alert_message_logger.py,sha256=K0Pu76Gd36lGEEYh8e8r7NMt7J-OQhbw0cZmiwutCOE,3591
-scenario/_events/event_bus.py,sha256=KFN0OxAQIQXIk_tVrorDoN_YLKVK9dos5SXFALstHgE,9809
-scenario/_events/event_reporter.py,sha256=4uND_kdPBXe-aUWCdSj4BLrMA33TDnbZzokAEOU3_08,3771
+scenario/_events/event_alert_message_logger.py,sha256=XcofGgXjeiTC75NPYheBpHxqA6R4pYAuHZa7-kH9Grg,2975
+scenario/_events/event_bus.py,sha256=IsKNsClF1JFYj728EcxX1hw_KbfDkfJq3Y2Kv4h94n4,9871
+scenario/_events/event_reporter.py,sha256=-6NNbBMy_FYr1O-1FuZ6eIUnLuI8NGRMUr0pybLJrCI,3873
 scenario/_events/events.py,sha256=UtEGY-_1B0LrwpgsNKgrvJBZhRtxuj3K_i6ZBfF7E4Q,6387
 scenario/_events/messages.py,sha256=quwP2OkeaGasNOoaV8GUeosZVKc5XDsde08T0xx_YQo,2297
-scenario/_events/utils.py,sha256=SproqiwjhLWAW7p82EirCgawpxAo0ksW1pBB4mKkcEs,3436
+scenario/_events/utils.py,sha256=CRrdDHBD2ptcNIjzW0eEG1V5-Vw1gFnp_UTz5zMQ_Ak,4051
 scenario/_generated/langwatch_api_client/README.md,sha256=Az5f2L4ChOnG_ZtrdBagzRVgeTCtBkbD_S5cIeAry2o,5424
 scenario/_generated/langwatch_api_client/pyproject.toml,sha256=Z8wxuGp4H9BJYVVJB8diW7rRU9XYxtPfw9mU4_wq4cA,560
 scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py,sha256=vVrn17y-3l3fOqeJk8aN3GlStRm2fo0f313l_0LtJNs,368
@@ -226,16 +227,16 @@ scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_req
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py,sha256=zDYmJ8bFBSJyF9D3cEn_ffrey-ITIfwr-_7eu72zLyk,2832
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py,sha256=-nRKUPZTAJQNxiKz128xF7DKgZNbFo4G3mr5xNXrkaw,2173
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
-scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1311
-scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
+scenario/_utils/__init__.py,sha256=xPVjLXnHTTq9fuRFh5lsMvwtIpEeJ3jy1vf5yTUMPsc,1313
+scenario/_utils/ids.py,sha256=W4tVMCf9ky0KLTDA_qOfErNhb4tCmxwa8zEuo1K1ZuY,2071
 scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
 scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
 scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1123
 scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
 scenario/config/model.py,sha256=T4HYA79CW1NxXDkFlyftYR6JzZcowbtIx0H-ijxRyfg,1297
-scenario/config/scenario.py,sha256=tVVnsUgG6Z0hYZiTDX-GGZz8l8co1HhyTqJUJNPinBk,5184
-langwatch_scenario-0.7.8.dist-info/METADATA,sha256=q7Rk73qwl5ZzaRTEF9IWxLzgCBniCMO8Ku240jVyBLY,20003
-langwatch_scenario-0.7.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langwatch_scenario-0.7.8.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
-langwatch_scenario-0.7.8.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
-langwatch_scenario-0.7.8.dist-info/RECORD,,
+scenario/config/scenario.py,sha256=6jrtcm0Fo7FpxQta7QIKdGMgl7cXrn374Inzx29hRuk,5406
+langwatch_scenario-0.7.10.dist-info/METADATA,sha256=pbLZM8UXj1_1TWHjheHP6QREOvRWfX7nHEdfY2ZX4aA,20065
+langwatch_scenario-0.7.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+langwatch_scenario-0.7.10.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
+langwatch_scenario-0.7.10.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
+langwatch_scenario-0.7.10.dist-info/RECORD,,

scenario/_events/event_alert_message_logger.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import os
+import webbrowser
 from typing import Set
+from ..config.scenario import ScenarioConfig
 from .._utils.ids import get_batch_run_id
@@ -12,6 +15,7 @@ class EventAlertMessageLogger:
     """
     _shown_batch_ids: Set[str] = set()
+    _shown_watch_urls: Set[str] = set()
     def handle_greeting(self) -> None:
         """
@@ -37,6 +41,10 @@ class EventAlertMessageLogger:
         if self._is_greeting_disabled():
             return
+        if set_url in EventAlertMessageLogger._shown_watch_urls:
+            return
+        EventAlertMessageLogger._shown_watch_urls.add(set_url)
         self._display_watch_message(set_url)
     def _is_greeting_disabled(self) -> bool:
@@ -49,35 +57,13 @@ class EventAlertMessageLogger:
         if not os.getenv("LANGWATCH_API_KEY"):
             print(f"\n{separator}")
-            print("🚀  LangWatch Simulation Reporting")
+            print("🎭  Running Scenario Tests")
             print(f"{separator}")
-            print("➡️  API key not configured")
+            print("➡️  LangWatch API key not configured")
             print("   Simulations will only output final results")
             print("")
             print("💡 To visualize conversations in real time:")
             print("   • Set LANGWATCH_API_KEY environment variable")
-            print("   • Or configure apiKey in scenario.config.js")
-            print("")
-            print(f"📦 Batch Run ID: {batch_run_id}")
-            print("")
-            print("🔇 To disable these messages:")
-            print("   • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
-            print(f"{separator}\n")
-        else:
-            endpoint = os.getenv("LANGWATCH_ENDPOINT", "https://app.langwatch.ai")
-            api_key = os.getenv("LANGWATCH_API_KEY", "")
-            print(f"\n{separator}")
-            print("🚀  LangWatch Simulation Reporting")
-            print(f"{separator}")
-            print("✅ Simulation reporting enabled")
-            print(f"   Endpoint: {endpoint}")
-            print(f"   API Key: {'Configured' if api_key else 'Not configured'}")
-            print("")
-            print(f"📦 Batch Run ID: {batch_run_id}")
-            print("")
-            print("🔇 To disable these messages:")
-            print("   • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
             print(f"{separator}\n")
     def _display_watch_message(self, set_url: str) -> None:
@@ -86,10 +72,15 @@ class EventAlertMessageLogger:
         batch_url = f"{set_url}/{get_batch_run_id()}"
         print(f"\n{separator}")
-        print("👀 Watch Your Simulation Live")
+        print("🎭  Running Scenario Tests")
         print(f"{separator}")
-        print("🌐 Open in your browser:")
-        print(f"   Scenario Set: {set_url}")
-        print(f"   Batch Run: {batch_url}")
-        print("")
+        print(f"Follow it live: {batch_url}")
         print(f"{separator}\n")
+        config = ScenarioConfig.default_config
+        if config and not config.headless:
+            # Open the URL in the default browser (cross-platform)
+            try:
+                webbrowser.open(batch_url)
+            except Exception:
+                pass

scenario/_events/event_bus.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, Any, Dict
 from .events import ScenarioEvent
 from .event_reporter import EventReporter
 from .event_alert_message_logger import EventAlertMessageLogger
+from ..config.scenario import ScenarioConfig
 import asyncio
 import queue
@@ -35,7 +36,9 @@ class ScenarioEventBus:
     """
     def __init__(
-        self, event_reporter: Optional[EventReporter] = None, max_retries: int = 3
+        self,
+        event_reporter: Optional[EventReporter] = None,
+        max_retries: int = 3,
     ):
         """
         Initialize the event bus with optional event reporter and retry configuration.

scenario/_events/event_reporter.py CHANGED Viewed

@@ -3,7 +3,7 @@ import httpx
 from typing import Optional, Dict, Any
 from .events import ScenarioEvent
 from .event_alert_message_logger import EventAlertMessageLogger
-from scenario.config import LangWatchSettings
+from scenario.config import LangWatchSettings, ScenarioConfig
 class EventReporter:
@@ -26,7 +26,11 @@ class EventReporter:
         reporter = EventReporter(api_key="your-api-key")
     """
-    def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
+    def __init__(
+        self,
+        endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+    ):
         # Load settings from environment variables
         langwatch_settings = LangWatchSettings()
@@ -69,6 +73,7 @@ class EventReporter:
                         "Content-Type": "application/json",
                         "X-Auth-Token": self.api_key,
                     },
+                    timeout=httpx.Timeout(30.0),
                 )
                 self.logger.info(
                     f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})"
@@ -92,7 +97,7 @@ class EventReporter:
                     )
         except Exception as error:
             self.logger.error(
-                f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
+                f"[{event_type}] Event POST error: {repr(error)}, event={event}, endpoint={self.endpoint}"
             )
         return result

scenario/_events/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import warnings
-from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ..types import ChatCompletionMessageParamWithTrace
 from .events import MessageType
 from .messages import (
     SystemMessage,
@@ -10,9 +11,12 @@ from .messages import (
     FunctionCall,
 )
 from typing import List
-import uuid
+from pksuid import PKSUID
-def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessageParam]) -> list[MessageType]:
+def convert_messages_to_api_client_messages(
+    messages: list[ChatCompletionMessageParamWithTrace],
+) -> list[MessageType]:
     """
     Converts OpenAI ChatCompletionMessageParam messages to API client Message format.
@@ -33,7 +37,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
     for i, message in enumerate(messages):
         # Generate unique ID for each message
-        message_id = message.get("id") or str(uuid.uuid4())
+        message_id = message.get("id") or str(PKSUID("scenariomsg"))
         role = message.get("role")
         content = message.get("content")
@@ -41,11 +45,13 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
         if role == "user":
             if not content:
                 raise ValueError(f"User message at index {i} missing required content")
-            converted_messages.append(UserMessage(
+            message_ = UserMessage(
                 id=message_id,
                 role="user",
-                content=str(content)
-            ))
+                content=str(content),
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         elif role == "assistant":
             # Handle tool calls if present
             tool_calls = message.get("tool_calls")
@@ -53,44 +59,54 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
             if tool_calls:
                 for tool_call in tool_calls:
-                    api_tool_calls.append(ToolCall(
-                        id=tool_call.get("id", str(uuid.uuid4())),
-                        type_="function",
-                        function=FunctionCall(
-                            name=tool_call["function"].get("name", "unknown"),
-                            arguments=tool_call["function"].get("arguments", "{}")
+                    api_tool_calls.append(
+                        ToolCall(
+                            id=tool_call.get("id", str(PKSUID("scenariotoolcall"))),
+                            type_="function",
+                            function=FunctionCall(
+                                name=tool_call["function"].get("name", "unknown"),
+                                arguments=tool_call["function"].get("arguments", "{}"),
+                            ),
                         )
-                    ))
+                    )
-            converted_messages.append(AssistantMessage(
+            message_ = AssistantMessage(
                 id=message_id,
                 role="assistant",
                 content=str(content),
-                tool_calls=api_tool_calls
-            ))
+                tool_calls=api_tool_calls,
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         elif role == "system":
             if not content:
-                raise ValueError(f"System message at index {i} missing required content")
-            converted_messages.append(SystemMessage(
-                id=message_id,
-                role="system",
-                content=str(content)
-            ))
+                raise ValueError(
+                    f"System message at index {i} missing required content"
+                )
+            message_ = SystemMessage(id=message_id, role="system", content=str(content))
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         elif role == "tool":
             tool_call_id = message.get("tool_call_id")
             if not tool_call_id:
-                warnings.warn(f"Tool message at index {i} missing required tool_call_id, skipping tool message")
+                warnings.warn(
+                    f"Tool message at index {i} missing required tool_call_id, skipping tool message"
+                )
                 continue
             if not content:
-                warnings.warn(f"Tool message at index {i} missing required content, skipping tool message")
+                warnings.warn(
+                    f"Tool message at index {i} missing required content, skipping tool message"
+                )
                 continue
-            converted_messages.append(ToolMessage(
+            message_ = ToolMessage(
                 id=message_id,
                 role="tool",
                 content=str(content),
-                tool_call_id=tool_call_id
-            ))
+                tool_call_id=tool_call_id,
+            )
+            message_.additional_properties = {"trace_id": message.get("trace_id")}
+            converted_messages.append(message_)
         else:
             raise ValueError(f"Unsupported message role '{role}' at index {i}")

scenario/_utils/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ from .ids import (
     generate_scenario_id,
     generate_thread_id,
     generate_message_id,
-    safe_parse_uuid,
+    safe_parse_ksuid,
 )
 from .utils import (
     SerializableAndPydanticEncoder,
@@ -34,7 +34,7 @@ __all__ = [
     "generate_scenario_id",
     "generate_thread_id",
     "generate_message_id",
-    "safe_parse_uuid",
+    "safe_parse_ksuid",
     "SerializableAndPydanticEncoder",
     "SerializableWithStringFallback",
     "print_openai_messages",

scenario/_utils/ids.py CHANGED Viewed

@@ -7,7 +7,7 @@ and scenario tracking.
 """
 import os
-import uuid
+from pksuid import PKSUID
 def generate_thread_id() -> str:
@@ -17,7 +17,7 @@ def generate_thread_id() -> str:
     Returns:
         str: A new thread ID.
     """
-    return f"thread_{uuid.uuid4()}"
+    return f"{PKSUID('scenariothread')}"
 def generate_scenario_run_id() -> str:
@@ -27,7 +27,7 @@ def generate_scenario_run_id() -> str:
     Returns:
         str: A new scenario run ID.
     """
-    return f"scenariorun_{uuid.uuid4()}"
+    return f"{PKSUID('scenariorun')}"
 def generate_scenario_id() -> str:
@@ -37,7 +37,7 @@ def generate_scenario_id() -> str:
     Returns:
         str: A new scenario ID.
     """
-    return f"scenario_{uuid.uuid4()}"
+    return f"{PKSUID('scenario')}"
 def get_batch_run_id() -> str:
@@ -52,7 +52,7 @@ def get_batch_run_id() -> str:
     batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
     if not batch_run_id:
         # Generate new batch ID if not set
-        batch_run_id = f"scenariobatchrun_{uuid.uuid4()}"
+        batch_run_id = f"{PKSUID('scenariobatch')}"
         os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
     return batch_run_id
@@ -65,23 +65,23 @@ def generate_message_id() -> str:
     Returns:
         str: A new message ID.
     """
-    return f"scenariomsg_{uuid.uuid4()}"
+    return f"{PKSUID('scenariomsg')}"
-def safe_parse_uuid(id_str: str) -> bool:
+def safe_parse_ksuid(id_str: str) -> bool:
     """
-    Safely parses a UUID string.
+    Safely parses a Ksuid string.
     Args:
-        id_str: The UUID string to parse.
+        id_str: The Ksuid string to parse.
     Returns:
-        bool: True if the UUID string is valid, false otherwise.
+        bool: True if the Ksuid string is valid, false otherwise.
     """
     try:
-        uuid.UUID(id_str)
+        PKSUID.parse(id_str)
         return True
-    except (ValueError, TypeError):
+    except Exception:
         return False

scenario/config/scenario.py CHANGED Viewed

@@ -5,6 +5,7 @@ This module provides the main configuration class for customizing the behavior
 of the Scenario testing framework, including execution parameters and debugging options.
 """
+import os
 from typing import Optional, Union, ClassVar
 from pydantic import BaseModel
@@ -53,6 +54,11 @@ class ScenarioConfig(BaseModel):
     verbose: Optional[Union[bool, int]] = True
     cache_key: Optional[str] = None
     debug: Optional[bool] = False
+    headless: Optional[bool] = os.getenv("SCENARIO_HEADLESS", "false").lower() not in [
+        "false",
+        "0",
+        "",
+    ]
     default_config: ClassVar[Optional["ScenarioConfig"]] = None
@@ -64,6 +70,7 @@ class ScenarioConfig(BaseModel):
         verbose: Optional[Union[bool, int]] = None,
         cache_key: Optional[str] = None,
         debug: Optional[bool] = None,
+        headless: Optional[bool] = None,
     ) -> None:
         """
         Set global configuration settings for all scenario executions.
@@ -107,6 +114,7 @@ class ScenarioConfig(BaseModel):
                 verbose=verbose,
                 cache_key=cache_key,
                 debug=debug,
+                headless=headless,
             )
         )

scenario/judge_agent.py CHANGED Viewed

@@ -12,7 +12,8 @@ import logging
 import re
 from typing import List, Optional, cast
-from litellm import Choices, completion
+import litellm
+from litellm import Choices
 from litellm.files.main import ModelResponse
 from scenario.cache import scenario_cache
@@ -356,7 +357,7 @@ if you don't have enough information to make a verdict, say inconclusive with ma
         response = cast(
             ModelResponse,
-            completion(
+            litellm.completion(
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,
@@ -398,7 +399,7 @@ if you don't have enough information to make a verdict, say inconclusive with ma
                         failed_criteria = [
                             self.criteria[idx]
                             for idx, criterion in enumerate(criteria.values())
-                            if criterion == False
+                            if criterion == False or criterion == "inconclusive"
                         ]
                         # Return the appropriate ScenarioResult based on the verdict

scenario/py.typed ADDED Viewed

File without changes

scenario/pytest_plugin.py CHANGED Viewed

@@ -199,6 +199,8 @@ class ScenarioReporter:
 # Store the original run method
 original_run = ScenarioExecutor.run
+def pytest_addoption(parser):
+    parser.addoption("--headless", action="store_true")
 @pytest.hookimpl(trylast=True)
 def pytest_configure(config):
@@ -240,6 +242,9 @@ def pytest_configure(config):
         print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
         ScenarioConfig.configure(verbose=True, debug=True)
+    if config.getoption("--headless"):
+        ScenarioConfig.configure(headless=True)
     # Create a global reporter instance
     config._scenario_reporter = ScenarioReporter()

scenario/scenario_executor.py CHANGED Viewed

@@ -6,6 +6,7 @@ of scenario tests, managing the interaction between user simulators, agents unde
 and judge agents to determine test success or failure.
 """
+import json
 import sys
 from typing import (
     Awaitable,
@@ -17,6 +18,7 @@ from typing import (
     Tuple,
     Union,
     TypedDict,
+    cast,
 )
 import time
 import warnings
@@ -33,6 +35,7 @@ from scenario._utils import (
     await_if_awaitable,
     get_batch_run_id,
     generate_scenario_run_id,
+    SerializableWithStringFallback,
 )
 from openai.types.chat import (
     ChatCompletionMessageParam,
@@ -40,7 +43,7 @@ from openai.types.chat import (
     ChatCompletionAssistantMessageParam,
 )
-from .types import AgentInput, AgentRole, ScenarioResult, ScriptStep
+from .types import AgentInput, AgentRole, ChatCompletionMessageParamWithTrace, ScenarioResult, ScriptStep
 from ._error_messages import agent_response_not_awaitable
 from .cache import context_scenario
 from .agent_adapter import AgentAdapter
@@ -62,6 +65,11 @@ from ._events import (
 from rx.subject.subject import Subject
 from rx.core.observable.observable import Observable
+import litellm
+import langwatch
+import langwatch.telemetry.context
+from langwatch.telemetry.tracing import LangWatchTrace
 class ScenarioExecutor:
     """
@@ -101,6 +109,7 @@ class ScenarioExecutor:
     _pending_agents_on_turn: Set[AgentAdapter] = set()
     _agent_times: Dict[int, float] = {}
     _events: Subject
+    _trace: LangWatchTrace
     event_bus: ScenarioEventBus
@@ -153,10 +162,12 @@ class ScenarioExecutor:
             verbose=verbose,
             cache_key=cache_key,
             debug=debug,
+            headless=None,
         )
         self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
-        self.reset()
+        self.batch_run_id = get_batch_run_id()
+        self.scenario_set_id = set_id or "default"
         # Create executor's own event stream
         self._events = Subject()
@@ -165,9 +176,6 @@ class ScenarioExecutor:
         self.event_bus = event_bus or ScenarioEventBus()
         self.event_bus.subscribe_to_events(self._events)
-        self.batch_run_id = get_batch_run_id()
-        self.scenario_set_id = set_id or "default"
     @property
     def events(self) -> Observable:
         """Expose event stream for subscribers like the event bus."""
@@ -198,7 +206,7 @@ class ScenarioExecutor:
         self._state = ScenarioState(
             description=self.description,
             messages=[],
-            thread_id=str(PKSUID("thread")),
+            thread_id=str(PKSUID("scenariothread")),
             current_turn=0,
             config=self.config,
             _executor=self,
@@ -252,6 +260,8 @@ class ScenarioExecutor:
             )
             ```
         """
+        message = cast(ChatCompletionMessageParamWithTrace, message)
+        message["trace_id"] = self._trace.trace_id
         self._state.messages.append(message)
         # Broadcast the message to other agents
@@ -262,6 +272,21 @@ class ScenarioExecutor:
                 self._pending_messages[idx] = []
             self._pending_messages[idx].append(message)
+        # Update trace with input/output
+        if message["role"] == "user":
+            self._trace.update(input={"type": "text", "value": str(message["content"])})
+        elif message["role"] == "assistant":
+            self._trace.update(
+                output={
+                    "type": "text",
+                    "value": str(
+                        message["content"]
+                        if "content" in message
+                        else json.dumps(message, cls=SerializableWithStringFallback)
+                    ),
+                }
+            )
     def add_messages(
         self,
         messages: List[ChatCompletionMessageParam],
@@ -291,6 +316,21 @@ class ScenarioExecutor:
             self.add_message(message, from_agent_idx)
     def _new_turn(self):
+        if hasattr(self, "_trace") and self._trace is not None:
+            self._trace.__exit__(None, None, None)
+        self._trace = langwatch.trace(
+            name="Scenario Turn",
+            metadata={
+                "labels": ["scenario"],
+                "thread_id": self._state.thread_id,
+                "scenario.name": self.name,
+                "scenario.batch_id": self.batch_run_id,
+                "scenario.set_id": self.scenario_set_id,
+                "scenario.turn": self._state.current_turn,
+            },
+        ).__enter__()
         self._pending_agents_on_turn = set(self.agents)
         self._pending_roles_on_turn = [
             AgentRole.USER,
@@ -459,7 +499,7 @@ class ScenarioExecutor:
     async def _call_agent(
         self, idx: int, role: AgentRole, request_judgment: bool = False
-    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult]:
+    ) -> Union[List[ChatCompletionMessageParam], ScenarioResult, None]:
         agent = self.agents[idx]
         if role == AgentRole.USER and self.config.debug:
@@ -481,67 +521,84 @@ class ScenarioExecutor:
                     ChatCompletionUserMessageParam(role="user", content=input_message)
                 ]
-        with show_spinner(
-            text=(
-                "Judging..."
-                if role == AgentRole.JUDGE
-                else f"{role.value if isinstance(role, AgentRole) else role}:"
-            ),
-            color=(
-                "blue"
-                if role == AgentRole.AGENT
-                else "green" if role == AgentRole.USER else "yellow"
-            ),
-            enabled=self.config.verbose,
-        ):
-            start_time = time.time()
-            # Prevent pydantic validation warnings which should already be disabled
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                agent_response = agent.call(
-                    AgentInput(
-                        # TODO: test thread_id
-                        thread_id=self._state.thread_id,
-                        messages=self._state.messages,
-                        new_messages=self._pending_messages.get(idx, []),
-                        judgment_request=request_judgment,
-                        scenario_state=self._state,
+        with self._trace.span(type="agent", name=f"{agent.__class__.__name__}.call") as span:
+            with show_spinner(
+                text=(
+                    "Judging..."
+                    if role == AgentRole.JUDGE
+                    else f"{role.value if isinstance(role, AgentRole) else role}:"
+                ),
+                color=(
+                    "blue"
+                    if role == AgentRole.AGENT
+                    else "green" if role == AgentRole.USER else "yellow"
+                ),
+                enabled=self.config.verbose,
+            ):
+                start_time = time.time()
+                # Prevent pydantic validation warnings which should already be disabled
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    self._trace.autotrack_litellm_calls(litellm)
+                    agent_response = agent.call(
+                        AgentInput(
+                            # TODO: test thread_id
+                            thread_id=self._state.thread_id,
+                            messages=cast(List[ChatCompletionMessageParam], self._state.messages),
+                            new_messages=self._pending_messages.get(idx, []),
+                            judgment_request=request_judgment,
+                            scenario_state=self._state,
+                        )
+                    )
+                if not isinstance(agent_response, Awaitable):
+                    raise Exception(
+                        agent_response_not_awaitable(agent.__class__.__name__),
                     )
-                )
-            if not isinstance(agent_response, Awaitable):
-                raise Exception(
-                    agent_response_not_awaitable(agent.__class__.__name__),
-                )
-            agent_response = await agent_response
+                agent_response = await agent_response
-            if idx not in self._agent_times:
-                self._agent_times[idx] = 0
-            self._agent_times[idx] += time.time() - start_time
+                if idx not in self._agent_times:
+                    self._agent_times[idx] = 0
+                self._agent_times[idx] += time.time() - start_time
-            self._pending_messages[idx] = []
-            check_valid_return_type(agent_response, agent.__class__.__name__)
+                self._pending_messages[idx] = []
+                check_valid_return_type(agent_response, agent.__class__.__name__)
+                messages = []
+                if isinstance(agent_response, ScenarioResult):
+                    # TODO: should be an event
+                    span.add_evaluation(
+                        name=f"{agent.__class__.__name__} Judgment",
+                        status="processed",
+                        passed=agent_response.success,
+                        details=agent_response.reasoning,
+                        score=(
+                            len(agent_response.passed_criteria)
+                            / len(agent_response.failed_criteria)
+                            if agent_response.failed_criteria
+                            else 1.0
+                        ),
+                    )
-            messages = []
-            if isinstance(agent_response, ScenarioResult):
-                # TODO: should be an event
-                return agent_response
-            else:
-                messages = convert_agent_return_types_to_openai_messages(
-                    agent_response,
-                    role="user" if role == AgentRole.USER else "assistant",
-                )
+                    return agent_response
+                else:
+                    messages = convert_agent_return_types_to_openai_messages(
+                        agent_response,
+                        role="user" if role == AgentRole.USER else "assistant",
+                    )
-            self.add_messages(messages, from_agent_idx=idx)
+                self.add_messages(messages, from_agent_idx=idx)
-            if messages and self.config.verbose:
-                print_openai_messages(
-                    self._scenario_name(),
-                    [m for m in messages if m["role"] != "system"],
-                )
+                if messages and self.config.verbose:
+                    print_openai_messages(
+                        self._scenario_name(),
+                        [m for m in messages if m["role"] != "system"],
+                    )
-            return messages
+                return messages
     def _scenario_name(self):
         if self.config.verbose == 2:
@@ -816,6 +873,7 @@ class ScenarioExecutor:
         # Signal end of event stream
         self._events.on_completed()
+        self._trace.__exit__(None, None, None)
 async def run(

scenario/scenario_state.py CHANGED Viewed

@@ -14,6 +14,7 @@ from openai.types.chat import (
 )
 from pydantic import BaseModel
+from scenario.types import ChatCompletionMessageParamWithTrace
 from scenario.config import ScenarioConfig
 if TYPE_CHECKING:
@@ -70,7 +71,7 @@ class ScenarioState(BaseModel):
     """
     description: str
-    messages: List[ChatCompletionMessageParam]
+    messages: List[ChatCompletionMessageParamWithTrace]
     thread_id: str
     current_turn: int
     config: ScenarioConfig

scenario/types.py CHANGED Viewed

@@ -8,10 +8,20 @@ from typing import (
     Callable,
     List,
     Optional,
+    TypeAlias,
     Union,
 )
-from openai.types.chat import ChatCompletionMessageParam, ChatCompletionUserMessageParam
+from openai.types.chat import (
+    ChatCompletionMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionFunctionMessageParam,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionDeveloperMessageParam,
+)
 # Prevent circular imports + Pydantic breaking
 if TYPE_CHECKING:
@@ -22,6 +32,48 @@ else:
     ScenarioStateType = Any
+# Since Python types do not support intersection, we need to wrap ALL the chat completion
+# message types with the trace_id field
+class ChatCompletionDeveloperMessageParamWithTrace(ChatCompletionDeveloperMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionSystemMessageParamWithTrace(ChatCompletionSystemMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionUserMessageParamWithTrace(ChatCompletionUserMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionAssistantMessageParamWithTrace(ChatCompletionAssistantMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionToolMessageParamWithTrace(ChatCompletionToolMessageParam):
+    trace_id: Optional[str]
+class ChatCompletionFunctionMessageParamWithTrace(ChatCompletionFunctionMessageParam):
+    trace_id: Optional[str]
+"""
+A wrapper around ChatCompletionMessageParam that adds a trace_id field to be able to
+tie back each message of the scenario run to a trace.
+"""
+ChatCompletionMessageParamWithTrace: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParamWithTrace,
+    ChatCompletionSystemMessageParamWithTrace,
+    ChatCompletionUserMessageParamWithTrace,
+    ChatCompletionAssistantMessageParamWithTrace,
+    ChatCompletionToolMessageParamWithTrace,
+    ChatCompletionFunctionMessageParamWithTrace,
+]
 class AgentRole(Enum):
     """
     Defines the different roles that agents can play in a scenario.
@@ -171,7 +223,7 @@ class ScenarioResult(BaseModel):
     success: bool
     # Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
-    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
+    messages: Annotated[List[ChatCompletionMessageParamWithTrace], SkipValidation]
     reasoning: Optional[str] = None
     passed_criteria: List[str] = []
     failed_criteria: List[str] = []

scenario/user_simulator_agent.py CHANGED Viewed

@@ -10,7 +10,8 @@ conversation history.
 import logging
 from typing import Optional, cast
-from litellm import Choices, completion
+import litellm
+from litellm import Choices
 from litellm.files.main import ModelResponse
 from scenario.cache import scenario_cache
@@ -228,7 +229,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
         response = cast(
             ModelResponse,
-            completion(
+            litellm.completion(
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,

{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langwatch_scenario-0.7.8.dist-info → langwatch_scenario-0.7.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

langwatch-scenario 0.7.8__py3-none-any.whl → 0.7.10__py3-none-any.whl

langwatch-scenario 0.7.8py3-none-any.whl → 0.7.10py3-none-any.whl