PyPI - langwatch-scenario - Versions diffs - 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl - Mend

langwatch-scenario 0.7.7py3-none-any.whl → 0.7.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/METADATA +2 -2
{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/RECORD +18 -17
scenario/_events/event_alert_message_logger.py +15 -29
scenario/_events/event_bus.py +4 -1
scenario/_events/event_reporter.py +8 -3
scenario/_events/utils.py +3 -3
scenario/_utils/__init__.py +2 -2
scenario/_utils/ids.py +12 -12
scenario/config/model.py +4 -1
scenario/config/scenario.py +8 -0
scenario/judge_agent.py +12 -1
scenario/py.typed +0 -0
scenario/pytest_plugin.py +5 -0
scenario/scenario_executor.py +2 -1
scenario/user_simulator_agent.py +12 -0
{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/top_level.txt +0 -0

{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langwatch-scenario
-Version: 0.7.7
+Version: 0.7.9
 Summary: The end-to-end agent testing library
 Author-email: LangWatch Team <support@langwatch.ai>
 License: MIT
@@ -457,7 +457,7 @@ This will cache any function call you decorate when running the tests and make t
 While optional, we strongly recommend setting stable identifiers for your scenarios, sets, and batches for better organization and tracking in LangWatch.
 - **set_id**: Groups related scenarios into a test suite. This corresponds to the "Simulation Set" in the UI.
-- **batch_run_id**: Groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
+- **SCENARIO_BATCH_RUN_ID**: Env variable that groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
 ```python
 import os

{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/RECORD RENAMED Viewed

@@ -2,20 +2,21 @@ scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
 scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
 scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
 scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
-scenario/judge_agent.py,sha256=7NsgeMu6wRMjU_HYTCFqkLma6H2AJuEkw9hJkt11190,16211
-scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
-scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
+scenario/judge_agent.py,sha256=TSwykEWhoBA9F__sUsSuUMpu7pOkT1lIJo8YlEj2eiA,16759
+scenario/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+scenario/pytest_plugin.py,sha256=wRCuGD9uwrrLt2fY15zK6mnmY9W_dO_m0WalPJYE5II,11491
+scenario/scenario_executor.py,sha256=_GRpFpw_WtgtaGpxWh0A0HNNf-aU78PdIiVdgEFm9MY,33136
 scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
 scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
 scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
-scenario/user_simulator_agent.py,sha256=UJ75xhqHwoi8-3JkR1AsHDzpHM2Lx-aDSTJ1gnq_SXc,9101
+scenario/user_simulator_agent.py,sha256=kqnSd4_gytzEwtkc06r58UdE1EycZBzejRPzfORDjdo,9619
 scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
-scenario/_events/event_alert_message_logger.py,sha256=K0Pu76Gd36lGEEYh8e8r7NMt7J-OQhbw0cZmiwutCOE,3591
-scenario/_events/event_bus.py,sha256=KFN0OxAQIQXIk_tVrorDoN_YLKVK9dos5SXFALstHgE,9809
-scenario/_events/event_reporter.py,sha256=4uND_kdPBXe-aUWCdSj4BLrMA33TDnbZzokAEOU3_08,3771
+scenario/_events/event_alert_message_logger.py,sha256=n2W3uT8y4x6KKL3H9Ez6CfzJOFlvOfvjDKsdhHUJkxs,2787
+scenario/_events/event_bus.py,sha256=IsKNsClF1JFYj728EcxX1hw_KbfDkfJq3Y2Kv4h94n4,9871
+scenario/_events/event_reporter.py,sha256=-6NNbBMy_FYr1O-1FuZ6eIUnLuI8NGRMUr0pybLJrCI,3873
 scenario/_events/events.py,sha256=UtEGY-_1B0LrwpgsNKgrvJBZhRtxuj3K_i6ZBfF7E4Q,6387
 scenario/_events/messages.py,sha256=quwP2OkeaGasNOoaV8GUeosZVKc5XDsde08T0xx_YQo,2297
-scenario/_events/utils.py,sha256=SproqiwjhLWAW7p82EirCgawpxAo0ksW1pBB4mKkcEs,3436
+scenario/_events/utils.py,sha256=KKqWFGkj4XtofKxM2yi-DBhBQp8wQOdls48iPHGCmUY,3473
 scenario/_generated/langwatch_api_client/README.md,sha256=Az5f2L4ChOnG_ZtrdBagzRVgeTCtBkbD_S5cIeAry2o,5424
 scenario/_generated/langwatch_api_client/pyproject.toml,sha256=Z8wxuGp4H9BJYVVJB8diW7rRU9XYxtPfw9mU4_wq4cA,560
 scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py,sha256=vVrn17y-3l3fOqeJk8aN3GlStRm2fo0f313l_0LtJNs,368
@@ -226,16 +227,16 @@ scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_req
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py,sha256=zDYmJ8bFBSJyF9D3cEn_ffrey-ITIfwr-_7eu72zLyk,2832
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py,sha256=-nRKUPZTAJQNxiKz128xF7DKgZNbFo4G3mr5xNXrkaw,2173
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
-scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1311
-scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
+scenario/_utils/__init__.py,sha256=xPVjLXnHTTq9fuRFh5lsMvwtIpEeJ3jy1vf5yTUMPsc,1313
+scenario/_utils/ids.py,sha256=W4tVMCf9ky0KLTDA_qOfErNhb4tCmxwa8zEuo1K1ZuY,2071
 scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
 scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
 scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1123
 scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
-scenario/config/model.py,sha256=Ve49S2FyzUifXJ-SAyKPiNtVqs8BfsYbODu_M5y0c8Y,1155
-scenario/config/scenario.py,sha256=tVVnsUgG6Z0hYZiTDX-GGZz8l8co1HhyTqJUJNPinBk,5184
-langwatch_scenario-0.7.7.dist-info/METADATA,sha256=L7h0kgOaIij6MYVCac0EqPu8ODkZNKxDeIrHCSJg2l4,20003
-langwatch_scenario-0.7.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langwatch_scenario-0.7.7.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
-langwatch_scenario-0.7.7.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
-langwatch_scenario-0.7.7.dist-info/RECORD,,
+scenario/config/model.py,sha256=T4HYA79CW1NxXDkFlyftYR6JzZcowbtIx0H-ijxRyfg,1297
+scenario/config/scenario.py,sha256=6jrtcm0Fo7FpxQta7QIKdGMgl7cXrn374Inzx29hRuk,5406
+langwatch_scenario-0.7.9.dist-info/METADATA,sha256=0s-yAn8iE1N-5dbqugYFpSl8btZrTyyDgWQDat8szxI,20030
+langwatch_scenario-0.7.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+langwatch_scenario-0.7.9.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
+langwatch_scenario-0.7.9.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
+langwatch_scenario-0.7.9.dist-info/RECORD,,

scenario/_events/event_alert_message_logger.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import os
+import webbrowser
 from typing import Set
+from ..config.scenario import ScenarioConfig
 from .._utils.ids import get_batch_run_id
@@ -49,35 +52,13 @@ class EventAlertMessageLogger:
         if not os.getenv("LANGWATCH_API_KEY"):
             print(f"\n{separator}")
-            print("🚀  LangWatch Simulation Reporting")
+            print("🎭  Running Scenario Tests")
             print(f"{separator}")
-            print("➡️  API key not configured")
+            print("➡️  LangWatch API key not configured")
             print("   Simulations will only output final results")
             print("")
             print("💡 To visualize conversations in real time:")
             print("   • Set LANGWATCH_API_KEY environment variable")
-            print("   • Or configure apiKey in scenario.config.js")
-            print("")
-            print(f"📦 Batch Run ID: {batch_run_id}")
-            print("")
-            print("🔇 To disable these messages:")
-            print("   • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
-            print(f"{separator}\n")
-        else:
-            endpoint = os.getenv("LANGWATCH_ENDPOINT", "https://app.langwatch.ai")
-            api_key = os.getenv("LANGWATCH_API_KEY", "")
-            print(f"\n{separator}")
-            print("🚀  LangWatch Simulation Reporting")
-            print(f"{separator}")
-            print("✅ Simulation reporting enabled")
-            print(f"   Endpoint: {endpoint}")
-            print(f"   API Key: {'Configured' if api_key else 'Not configured'}")
-            print("")
-            print(f"📦 Batch Run ID: {batch_run_id}")
-            print("")
-            print("🔇 To disable these messages:")
-            print("   • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
             print(f"{separator}\n")
     def _display_watch_message(self, set_url: str) -> None:
@@ -86,10 +67,15 @@ class EventAlertMessageLogger:
         batch_url = f"{set_url}/{get_batch_run_id()}"
         print(f"\n{separator}")
-        print("👀 Watch Your Simulation Live")
+        print("🎭  Running Scenario Tests")
         print(f"{separator}")
-        print("🌐 Open in your browser:")
-        print(f"   Scenario Set: {set_url}")
-        print(f"   Batch Run: {batch_url}")
-        print("")
+        print(f"Follow it live: {batch_url}")
         print(f"{separator}\n")
+        config = ScenarioConfig.default_config
+        if config and not config.headless:
+            # Open the URL in the default browser (cross-platform)
+            try:
+                webbrowser.open(batch_url)
+            except Exception:
+                pass

scenario/_events/event_bus.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, Any, Dict
 from .events import ScenarioEvent
 from .event_reporter import EventReporter
 from .event_alert_message_logger import EventAlertMessageLogger
+from ..config.scenario import ScenarioConfig
 import asyncio
 import queue
@@ -35,7 +36,9 @@ class ScenarioEventBus:
     """
     def __init__(
-        self, event_reporter: Optional[EventReporter] = None, max_retries: int = 3
+        self,
+        event_reporter: Optional[EventReporter] = None,
+        max_retries: int = 3,
     ):
         """
         Initialize the event bus with optional event reporter and retry configuration.

scenario/_events/event_reporter.py CHANGED Viewed

@@ -3,7 +3,7 @@ import httpx
 from typing import Optional, Dict, Any
 from .events import ScenarioEvent
 from .event_alert_message_logger import EventAlertMessageLogger
-from scenario.config import LangWatchSettings
+from scenario.config import LangWatchSettings, ScenarioConfig
 class EventReporter:
@@ -26,7 +26,11 @@ class EventReporter:
         reporter = EventReporter(api_key="your-api-key")
     """
-    def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
+    def __init__(
+        self,
+        endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+    ):
         # Load settings from environment variables
         langwatch_settings = LangWatchSettings()
@@ -69,6 +73,7 @@ class EventReporter:
                         "Content-Type": "application/json",
                         "X-Auth-Token": self.api_key,
                     },
+                    timeout=httpx.Timeout(30.0),
                 )
                 self.logger.info(
                     f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})"
@@ -92,7 +97,7 @@ class EventReporter:
                     )
         except Exception as error:
             self.logger.error(
-                f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
+                f"[{event_type}] Event POST error: {repr(error)}, event={event}, endpoint={self.endpoint}"
             )
         return result

scenario/_events/utils.py CHANGED Viewed

@@ -10,7 +10,7 @@ from .messages import (
     FunctionCall,
 )
 from typing import List
-import uuid
+from pksuid import PKSUID
 def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessageParam]) -> list[MessageType]:
     """
@@ -33,7 +33,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
     for i, message in enumerate(messages):
         # Generate unique ID for each message
-        message_id = message.get("id") or str(uuid.uuid4())
+        message_id = message.get("id") or str(PKSUID('scenariomsg'))
         role = message.get("role")
         content = message.get("content")
@@ -54,7 +54,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
             if tool_calls:
                 for tool_call in tool_calls:
                     api_tool_calls.append(ToolCall(
-                        id=tool_call.get("id", str(uuid.uuid4())),
+                        id=tool_call.get("id", str(PKSUID('scenariotoolcall'))),
                         type_="function",
                         function=FunctionCall(
                             name=tool_call["function"].get("name", "unknown"),

scenario/_utils/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ from .ids import (
     generate_scenario_id,
     generate_thread_id,
     generate_message_id,
-    safe_parse_uuid,
+    safe_parse_ksuid,
 )
 from .utils import (
     SerializableAndPydanticEncoder,
@@ -34,7 +34,7 @@ __all__ = [
     "generate_scenario_id",
     "generate_thread_id",
     "generate_message_id",
-    "safe_parse_uuid",
+    "safe_parse_ksuid",
     "SerializableAndPydanticEncoder",
     "SerializableWithStringFallback",
     "print_openai_messages",

scenario/_utils/ids.py CHANGED Viewed

@@ -7,7 +7,7 @@ and scenario tracking.
 """
 import os
-import uuid
+from pksuid import PKSUID
 def generate_thread_id() -> str:
@@ -17,7 +17,7 @@ def generate_thread_id() -> str:
     Returns:
         str: A new thread ID.
     """
-    return f"thread_{uuid.uuid4()}"
+    return f"{PKSUID('scenariothread')}"
 def generate_scenario_run_id() -> str:
@@ -27,7 +27,7 @@ def generate_scenario_run_id() -> str:
     Returns:
         str: A new scenario run ID.
     """
-    return f"scenariorun_{uuid.uuid4()}"
+    return f"{PKSUID('scenariorun')}"
 def generate_scenario_id() -> str:
@@ -37,7 +37,7 @@ def generate_scenario_id() -> str:
     Returns:
         str: A new scenario ID.
     """
-    return f"scenario_{uuid.uuid4()}"
+    return f"{PKSUID('scenario')}"
 def get_batch_run_id() -> str:
@@ -52,7 +52,7 @@ def get_batch_run_id() -> str:
     batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
     if not batch_run_id:
         # Generate new batch ID if not set
-        batch_run_id = f"scenariobatchrun_{uuid.uuid4()}"
+        batch_run_id = f"{PKSUID('scenariobatch')}"
         os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
     return batch_run_id
@@ -65,23 +65,23 @@ def generate_message_id() -> str:
     Returns:
         str: A new message ID.
     """
-    return f"scenariomsg_{uuid.uuid4()}"
+    return f"{PKSUID('scenariomsg')}"
-def safe_parse_uuid(id_str: str) -> bool:
+def safe_parse_ksuid(id_str: str) -> bool:
     """
-    Safely parses a UUID string.
+    Safely parses a Ksuid string.
     Args:
-        id_str: The UUID string to parse.
+        id_str: The Ksuid string to parse.
     Returns:
-        bool: True if the UUID string is valid, false otherwise.
+        bool: True if the Ksuid string is valid, false otherwise.
     """
     try:
-        uuid.UUID(id_str)
+        PKSUID.parse(id_str)
         return True
-    except (ValueError, TypeError):
+    except Exception:
         return False

scenario/config/model.py CHANGED Viewed

@@ -17,7 +17,8 @@ class ModelConfig(BaseModel):
     for use with user simulator and judge agents in the Scenario framework.
     Attributes:
-        model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
+        model: The model identifier (e.g., "openai/gpt-4.1", "anthropic/claude-3-sonnet")
+        api_base: Optional base URL where the model is hosted
         api_key: Optional API key for the model provider
         temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
         max_tokens: Maximum number of tokens to generate in responses
@@ -26,6 +27,7 @@ class ModelConfig(BaseModel):
         ```
         model_config = ModelConfig(
             model="openai/gpt-4.1",
+            api_base="https://api.openai.com/v1",
             api_key="your-api-key",
             temperature=0.1,
             max_tokens=1000
@@ -34,6 +36,7 @@ class ModelConfig(BaseModel):
     """
     model: str
+    api_base: Optional[str] = None
     api_key: Optional[str] = None
     temperature: float = 0.0
     max_tokens: Optional[int] = None

scenario/config/scenario.py CHANGED Viewed

@@ -5,6 +5,7 @@ This module provides the main configuration class for customizing the behavior
 of the Scenario testing framework, including execution parameters and debugging options.
 """
+import os
 from typing import Optional, Union, ClassVar
 from pydantic import BaseModel
@@ -53,6 +54,11 @@ class ScenarioConfig(BaseModel):
     verbose: Optional[Union[bool, int]] = True
     cache_key: Optional[str] = None
     debug: Optional[bool] = False
+    headless: Optional[bool] = os.getenv("SCENARIO_HEADLESS", "false").lower() not in [
+        "false",
+        "0",
+        "",
+    ]
     default_config: ClassVar[Optional["ScenarioConfig"]] = None
@@ -64,6 +70,7 @@ class ScenarioConfig(BaseModel):
         verbose: Optional[Union[bool, int]] = None,
         cache_key: Optional[str] = None,
         debug: Optional[bool] = None,
+        headless: Optional[bool] = None,
     ) -> None:
         """
         Set global configuration settings for all scenario executions.
@@ -107,6 +114,7 @@ class ScenarioConfig(BaseModel):
                 verbose=verbose,
                 cache_key=cache_key,
                 debug=debug,
+                headless=headless,
             )
         )

scenario/judge_agent.py CHANGED Viewed

@@ -41,6 +41,7 @@ class JudgeAgent(AgentAdapter):
     Attributes:
         role: Always AgentRole.JUDGE for judge agents
         model: LLM model identifier to use for evaluation
+        api_base: Optional base URL where the model is hosted
         api_key: Optional API key for the model provider
         temperature: Sampling temperature for evaluation consistency
         max_tokens: Maximum tokens for judge reasoning
@@ -97,6 +98,7 @@ class JudgeAgent(AgentAdapter):
     role = AgentRole.JUDGE
     model: str
+    api_base: Optional[str]
     api_key: Optional[str]
     temperature: float
     max_tokens: Optional[int]
@@ -108,6 +110,7 @@ class JudgeAgent(AgentAdapter):
         *,
         criteria: Optional[List[str]] = None,
         model: Optional[str] = None,
+        api_base: Optional[str] = None,
         api_key: Optional[str] = None,
         temperature: float = 0.0,
         max_tokens: Optional[int] = None,
@@ -122,6 +125,8 @@ class JudgeAgent(AgentAdapter):
                      and negative constraints ("Agent should not provide personal information").
             model: LLM model identifier (e.g., "openai/gpt-4.1").
                    If not provided, uses the default model from global configuration.
+            api_base: Optional base URL where the model is hosted. If not provided,
+                      uses the base URL from global configuration.
             api_key: API key for the model provider. If not provided,
                      uses the key from global configuration or environment.
             temperature: Sampling temperature for evaluation (0.0-1.0).
@@ -156,6 +161,7 @@ class JudgeAgent(AgentAdapter):
         """
         # Override the default system prompt for the judge agent
         self.criteria = criteria or []
+        self.api_base = api_base
         self.api_key = api_key
         self.temperature = temperature
         self.max_tokens = max_tokens
@@ -172,6 +178,9 @@ class JudgeAgent(AgentAdapter):
             ScenarioConfig.default_config.default_model, ModelConfig
         ):
             self.model = model or ScenarioConfig.default_config.default_model.model
+            self.api_base = (
+                api_base or ScenarioConfig.default_config.default_model.api_base
+            )
             self.api_key = (
                 api_key or ScenarioConfig.default_config.default_model.api_key
             )
@@ -351,6 +360,8 @@ if you don't have enough information to make a verdict, say inconclusive with ma
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,
+                api_key=self.api_key,
+                api_base=self.api_base,
                 max_tokens=self.max_tokens,
                 tools=tools,
                 tool_choice=(
@@ -387,7 +398,7 @@ if you don't have enough information to make a verdict, say inconclusive with ma
                         failed_criteria = [
                             self.criteria[idx]
                             for idx, criterion in enumerate(criteria.values())
-                            if criterion == False
+                            if criterion == False or criterion == "inconclusive"
                         ]
                         # Return the appropriate ScenarioResult based on the verdict

scenario/py.typed ADDED Viewed

File without changes

scenario/pytest_plugin.py CHANGED Viewed

@@ -199,6 +199,8 @@ class ScenarioReporter:
 # Store the original run method
 original_run = ScenarioExecutor.run
+def pytest_addoption(parser):
+    parser.addoption("--headless", action="store_true")
 @pytest.hookimpl(trylast=True)
 def pytest_configure(config):
@@ -240,6 +242,9 @@ def pytest_configure(config):
         print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
         ScenarioConfig.configure(verbose=True, debug=True)
+    if config.getoption("--headless"):
+        ScenarioConfig.configure(headless=True)
     # Create a global reporter instance
     config._scenario_reporter = ScenarioReporter()

scenario/scenario_executor.py CHANGED Viewed

@@ -153,6 +153,7 @@ class ScenarioExecutor:
             verbose=verbose,
             cache_key=cache_key,
             debug=debug,
+            headless=None,
         )
         self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
@@ -198,7 +199,7 @@ class ScenarioExecutor:
         self._state = ScenarioState(
             description=self.description,
             messages=[],
-            thread_id=str(PKSUID("thread")),
+            thread_id=str(PKSUID("scenariothread")),
             current_turn=0,
             config=self.config,
             _executor=self,

scenario/user_simulator_agent.py CHANGED Viewed

@@ -37,6 +37,7 @@ class UserSimulatorAgent(AgentAdapter):
     Attributes:
         role: Always AgentRole.USER for user simulator agents
         model: LLM model identifier to use for generating user messages
+        api_base: Optional base URL where the model is hosted
         api_key: Optional API key for the model provider
         temperature: Sampling temperature for response generation
         max_tokens: Maximum tokens to generate in user messages
@@ -76,9 +77,11 @@ class UserSimulatorAgent(AgentAdapter):
         - Messages are generated in a casual, human-like style (lowercase, brief, etc.)
         - The simulator will not act as an assistant - it only generates user inputs
     """
     role = AgentRole.USER
     model: str
+    api_base: Optional[str]
     api_key: Optional[str]
     temperature: float
     max_tokens: Optional[int]
@@ -88,6 +91,7 @@ class UserSimulatorAgent(AgentAdapter):
         self,
         *,
         model: Optional[str] = None,
+        api_base: Optional[str] = None,
         api_key: Optional[str] = None,
         temperature: float = 0.0,
         max_tokens: Optional[int] = None,
@@ -99,6 +103,8 @@ class UserSimulatorAgent(AgentAdapter):
         Args:
             model: LLM model identifier (e.g., "openai/gpt-4.1").
                    If not provided, uses the default model from global configuration.
+            api_base: Optional base URL where the model is hosted. If not provided,
+                      uses the base URL from global configuration.
             api_key: API key for the model provider. If not provided,
                      uses the key from global configuration or environment.
             temperature: Sampling temperature for message generation (0.0-1.0).
@@ -128,6 +134,7 @@ class UserSimulatorAgent(AgentAdapter):
             ```
         """
         # Override the default system prompt for the user simulator agent
+        self.api_base = api_base
         self.api_key = api_key
         self.temperature = temperature
         self.max_tokens = max_tokens
@@ -144,6 +151,9 @@ class UserSimulatorAgent(AgentAdapter):
             ScenarioConfig.default_config.default_model, ModelConfig
         ):
             self.model = model or ScenarioConfig.default_config.default_model.model
+            self.api_base = (
+                api_base or ScenarioConfig.default_config.default_model.api_base
+            )
             self.api_key = (
                 api_key or ScenarioConfig.default_config.default_model.api_key
             )
@@ -222,6 +232,8 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
                 model=self.model,
                 messages=messages,
                 temperature=self.temperature,
+                api_key=self.api_key,
+                api_base=self.api_base,
                 max_tokens=self.max_tokens,
                 tools=[],
             ),

{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langwatch_scenario-0.7.7.dist-info → langwatch_scenario-0.7.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

langwatch-scenario 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl

langwatch-scenario 0.7.7py3-none-any.whl → 0.7.9py3-none-any.whl