PyPI - langwatch-scenario - Versions diffs - 0.7.3__py3-none-any.whl → 0.7.7__py3-none-any.whl - Mend

langwatch-scenario 0.7.3py3-none-any.whl → 0.7.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/METADATA +14 -12
{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/RECORD +17 -13
scenario/__init__.py +1 -1
scenario/_error_messages.py +2 -2
scenario/_events/event_alert_message_logger.py +95 -0
scenario/_events/event_bus.py +90 -30
scenario/_events/event_reporter.py +43 -28
scenario/_generated/langwatch_api_client/README.md +27 -17
scenario/config/__init__.py +43 -0
scenario/config/langwatch.py +51 -0
scenario/config/model.py +39 -0
scenario/{config.py → config/scenario.py} +5 -34
scenario/judge_agent.py +2 -2
scenario/user_simulator_agent.py +6 -6
{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/top_level.txt +0 -0

{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langwatch-scenario
-Version: 0.7.3
+Version: 0.7.7
 Summary: The end-to-end agent testing library
 Author-email: LangWatch Team <support@langwatch.ai>
 License: MIT
@@ -30,12 +30,12 @@ Requires-Dist: pksuid>=1.1.2
 Requires-Dist: httpx>=0.27.0
 Requires-Dist: rx>=3.2.0
 Requires-Dist: python-dateutil>=2.9.0.post0
+Requires-Dist: pydantic-settings>=2.9.1
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: isort; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"
 Requires-Dist: pre-commit; extra == "dev"
-Requires-Dist: commitizen; extra == "dev"
 Requires-Dist: pyright; extra == "dev"
 Requires-Dist: pydantic-ai; extra == "dev"
 Requires-Dist: function-schema; extra == "dev"
@@ -88,7 +88,7 @@ result = await scenario.run(
     # Define the agents that will play this simulation
     agents=[
         WeatherAgent(),
-        scenario.UserSimulatorAgent(model="openai/gpt-4.1-mini"),
+        scenario.UserSimulatorAgent(model="openai/gpt-4.1"),
     ],
     # (Optional) Control the simulation
@@ -159,7 +159,7 @@ import pytest
 import scenario
 import litellm
-scenario.configure(default_model="openai/gpt-4.1-mini")
+scenario.configure(default_model="openai/gpt-4.1")
 @pytest.mark.agent_test
@@ -189,6 +189,7 @@ async def test_vegetarian_recipe_agent():
                 ]
             ),
         ],
+        set_id="python-examples",
     )
     # Assert for pytest to know whether the test passed
@@ -202,7 +203,7 @@ import litellm
 @scenario.cache()
 def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
     response = litellm.completion(
-        model="openai/gpt-4.1-mini",
+        model="openai/gpt-4.1",
         messages=[
             {
                 "role": "system",
@@ -227,17 +228,17 @@ def vegetarian_recipe_agent(messages) -> scenario.AgentReturnTypes:
 Save it as `tests/vegetarian-recipe-agent.test.ts`:
 ```typescript
+import scenario, { type AgentAdapter, AgentRole } from "@langwatch/scenario";
 import { openai } from "@ai-sdk/openai";
-import * as scenario from "@langwatch/scenario";
 import { generateText } from "ai";
 import { describe, it, expect } from "vitest";
 describe("Vegetarian Recipe Agent", () => {
-  const agent: scenario.AgentAdapter = {
-    role: scenario.AgentRole.AGENT,
+  const agent: AgentAdapter = {
+    role: AgentRole.AGENT,
     call: async (input) => {
       const response = await generateText({
-        model: openai("gpt-4.1-mini"),
+        model: openai("gpt-4.1"),
         messages: [
           {
             role: "system",
@@ -258,7 +259,7 @@ describe("Vegetarian Recipe Agent", () => {
         agent,
         scenario.userSimulatorAgent(),
         scenario.judgeAgent({
-          model: openai("gpt-4.1-mini"),
+          model: openai("gpt-4.1"),
           criteria: [
             "Agent should not ask more than two follow-up questions",
             "Agent should generate a recipe",
@@ -268,6 +269,7 @@ describe("Vegetarian Recipe Agent", () => {
           ],
         }),
       ],
+      setId: "javascript-examples",
     });
     expect(result.success).toBe(true);
   });
@@ -417,7 +419,7 @@ You can enable debug mode by setting the `debug` field to `True` in the `Scenari
 Debug mode allows you to see the messages in slow motion step by step, and intervene with your own inputs to debug your agent from the middle of the conversation.
 ```python
-scenario.configure(default_model="openai/gpt-4.1-mini", debug=True)
+scenario.configure(default_model="openai/gpt-4.1", debug=True)
 ```
 or
@@ -431,7 +433,7 @@ pytest -s tests/test_vegetarian_recipe_agent.py --debug
 Each time the scenario runs, the testing agent might chose a different input to start, this is good to make sure it covers the variance of real users as well, however we understand that the non-deterministic nature of it might make it less repeatable, costly and harder to debug. To solve for it, you can use the `cache_key` field in the `Scenario.configure` method or in the specific scenario you are running, this will make the testing agent give the same input for given the same scenario:
 ```python
-scenario.configure(default_model="openai/gpt-4.1-mini", cache_key="42")
+scenario.configure(default_model="openai/gpt-4.1", cache_key="42")
 ```
 To bust the cache, you can simply pass a different `cache_key`, disable it, or delete the cache files located at `~/.scenario/cache`.

{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
-scenario/__init__.py,sha256=na4kbWIovF68IVRcfcx4f2YopOUOq9sVbZKCNife_Fk,4228
-scenario/_error_messages.py,sha256=6lEx3jBGMbPx0kG0eX5zoZE-ENVM3O_ZkIbVMlnidYs,3892
+scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
+scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
 scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
 scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
-scenario/config.py,sha256=xhUuXH-sThwPTmJNSuajKxX-WC_tcFwJ1jZc119DswA,6093
-scenario/judge_agent.py,sha256=d8vORsqpUPIA4yhlBTv5Yi4I2MdcfXselYBTFvfZx-4,16221
+scenario/judge_agent.py,sha256=7NsgeMu6wRMjU_HYTCFqkLma6H2AJuEkw9hJkt11190,16211
 scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
 scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
 scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
 scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
 scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
-scenario/user_simulator_agent.py,sha256=fhwi8W44s343BGrjJXSJw960wcK7MgwTg-epxR1bqHo,9088
+scenario/user_simulator_agent.py,sha256=UJ75xhqHwoi8-3JkR1AsHDzpHM2Lx-aDSTJ1gnq_SXc,9101
 scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
-scenario/_events/event_bus.py,sha256=PBnpfSj-81_DQHgCwI6oGYzlzbPCwmsNbmI0Kjp787Y,8052
-scenario/_events/event_reporter.py,sha256=gVLX3ftbNxDrD6zxMqsuSTeImswhQZQrAZGJFCzLXYc,3093
+scenario/_events/event_alert_message_logger.py,sha256=K0Pu76Gd36lGEEYh8e8r7NMt7J-OQhbw0cZmiwutCOE,3591
+scenario/_events/event_bus.py,sha256=KFN0OxAQIQXIk_tVrorDoN_YLKVK9dos5SXFALstHgE,9809
+scenario/_events/event_reporter.py,sha256=4uND_kdPBXe-aUWCdSj4BLrMA33TDnbZzokAEOU3_08,3771
 scenario/_events/events.py,sha256=UtEGY-_1B0LrwpgsNKgrvJBZhRtxuj3K_i6ZBfF7E4Q,6387
 scenario/_events/messages.py,sha256=quwP2OkeaGasNOoaV8GUeosZVKc5XDsde08T0xx_YQo,2297
 scenario/_events/utils.py,sha256=SproqiwjhLWAW7p82EirCgawpxAo0ksW1pBB4mKkcEs,3436
-scenario/_generated/langwatch_api_client/README.md,sha256=sWyTtXewM_pumKcaGFNV-F5D8e0uJ13a6q5VWmyFS3U,5445
+scenario/_generated/langwatch_api_client/README.md,sha256=Az5f2L4ChOnG_ZtrdBagzRVgeTCtBkbD_S5cIeAry2o,5424
 scenario/_generated/langwatch_api_client/pyproject.toml,sha256=Z8wxuGp4H9BJYVVJB8diW7rRU9XYxtPfw9mU4_wq4cA,560
 scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py,sha256=vVrn17y-3l3fOqeJk8aN3GlStRm2fo0f313l_0LtJNs,368
 scenario/_generated/langwatch_api_client/lang_watch_api_client/client.py,sha256=o_mdLqyBCQstu5tS1WZFwqIEbGwkvWQ7eQjuCJw_5VY,12419
@@ -230,8 +230,12 @@ scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1
 scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
 scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
 scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
-langwatch_scenario-0.7.3.dist-info/METADATA,sha256=5vyo2hMNsKaJKUbDBxUv7-YSD85ufDqczfLS6yp5b1Y,19959
-langwatch_scenario-0.7.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langwatch_scenario-0.7.3.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
-langwatch_scenario-0.7.3.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
-langwatch_scenario-0.7.3.dist-info/RECORD,,
+scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1123
+scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
+scenario/config/model.py,sha256=Ve49S2FyzUifXJ-SAyKPiNtVqs8BfsYbODu_M5y0c8Y,1155
+scenario/config/scenario.py,sha256=tVVnsUgG6Z0hYZiTDX-GGZz8l8co1HhyTqJUJNPinBk,5184
+langwatch_scenario-0.7.7.dist-info/METADATA,sha256=L7h0kgOaIij6MYVCac0EqPu8ODkZNKxDeIrHCSJg2l4,20003
+langwatch_scenario-0.7.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+langwatch_scenario-0.7.7.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
+langwatch_scenario-0.7.7.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
+langwatch_scenario-0.7.7.dist-info/RECORD,,

scenario/__init__.py CHANGED Viewed

@@ -25,7 +25,7 @@ Basic Usage:
     import scenario
     # Configure global settings
-    scenario.configure(default_model="openai/gpt-4.1-mini")
+    scenario.configure(default_model="openai/gpt-4.1")
     # Create your agent adapter
     class MyAgent(scenario.AgentAdapter):

scenario/_error_messages.py CHANGED Viewed

@@ -8,12 +8,12 @@ def agent_not_configured_error_message(class_name: str):
  {termcolor.colored("->", "cyan")} {class_name} was initialized without a model, please set the model when defining the testing agent, for example:
-    {class_name}(model="openai/gpt-4.1-mini")
+    {class_name}(model="openai/gpt-4.1")
     {termcolor.colored("^" * (29 + len(class_name)), "green")}
  {termcolor.colored("->", "cyan")} Alternatively, you can set the default model globally, for example:
-    scenario.configure(default_model="openai/gpt-4.1-mini")
+    scenario.configure(default_model="openai/gpt-4.1")
     {termcolor.colored("^" * 55, "green")}
 """

scenario/_events/event_alert_message_logger.py ADDED Viewed

@@ -0,0 +1,95 @@
+import os
+from typing import Set
+from .._utils.ids import get_batch_run_id
+class EventAlertMessageLogger:
+    """
+    Handles console output of alert messages for scenario events.
+    Single responsibility: Display user-friendly messages about event reporting status
+    and simulation watching instructions.
+    """
+    _shown_batch_ids: Set[str] = set()
+    def handle_greeting(self) -> None:
+        """
+        Shows a fancy greeting message about simulation reporting status.
+        Only shows once per batch run to avoid spam.
+        """
+        if self._is_greeting_disabled():
+            return
+        batch_run_id = get_batch_run_id()
+        if batch_run_id in EventAlertMessageLogger._shown_batch_ids:
+            return
+        EventAlertMessageLogger._shown_batch_ids.add(batch_run_id)
+        self._display_greeting(batch_run_id)
+    def handle_watch_message(self, set_url: str) -> None:
+        """
+        Shows a fancy message about how to watch the simulation.
+        Called when a run started event is received with a session ID.
+        """
+        if self._is_greeting_disabled():
+            return
+        self._display_watch_message(set_url)
+    def _is_greeting_disabled(self) -> bool:
+        """Check if greeting messages are disabled via environment variable."""
+        return bool(os.getenv("SCENARIO_DISABLE_SIMULATION_REPORT_INFO"))
+    def _display_greeting(self, batch_run_id: str) -> None:
+        """Display the greeting message with simulation reporting status."""
+        separator = "─" * 60
+        if not os.getenv("LANGWATCH_API_KEY"):
+            print(f"\n{separator}")
+            print("🚀  LangWatch Simulation Reporting")
+            print(f"{separator}")
+            print("➡️  API key not configured")
+            print("   Simulations will only output final results")
+            print("")
+            print("💡 To visualize conversations in real time:")
+            print("   • Set LANGWATCH_API_KEY environment variable")
+            print("   • Or configure apiKey in scenario.config.js")
+            print("")
+            print(f"📦 Batch Run ID: {batch_run_id}")
+            print("")
+            print("🔇 To disable these messages:")
+            print("   • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
+            print(f"{separator}\n")
+        else:
+            endpoint = os.getenv("LANGWATCH_ENDPOINT", "https://app.langwatch.ai")
+            api_key = os.getenv("LANGWATCH_API_KEY", "")
+            print(f"\n{separator}")
+            print("🚀  LangWatch Simulation Reporting")
+            print(f"{separator}")
+            print("✅ Simulation reporting enabled")
+            print(f"   Endpoint: {endpoint}")
+            print(f"   API Key: {'Configured' if api_key else 'Not configured'}")
+            print("")
+            print(f"📦 Batch Run ID: {batch_run_id}")
+            print("")
+            print("🔇 To disable these messages:")
+            print("   • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
+            print(f"{separator}\n")
+    def _display_watch_message(self, set_url: str) -> None:
+        """Display the watch message with URLs for viewing the simulation."""
+        separator = "─" * 60
+        batch_url = f"{set_url}/{get_batch_run_id()}"
+        print(f"\n{separator}")
+        print("👀 Watch Your Simulation Live")
+        print(f"{separator}")
+        print("🌐 Open in your browser:")
+        print(f"   Scenario Set: {set_url}")
+        print(f"   Batch Run: {batch_url}")
+        print("")
+        print(f"{separator}\n")

scenario/_events/event_bus.py CHANGED Viewed

@@ -1,13 +1,15 @@
 from rx.core.observable.observable import Observable
-from typing import Optional, Any
+from typing import Optional, Any, Dict
 from .events import ScenarioEvent
 from .event_reporter import EventReporter
+from .event_alert_message_logger import EventAlertMessageLogger
 import asyncio
 import queue
 import threading
 import logging
 class ScenarioEventBus:
     """
     Subscribes to scenario event streams and handles HTTP posting using a dedicated worker thread.
@@ -24,6 +26,7 @@ class ScenarioEventBus:
     Attributes:
         _event_reporter: EventReporter instance for HTTP posting of events
+        _event_alert_message_logger: EventAlertMessageLogger for user-friendly console output
         _max_retries: Maximum number of retry attempts for failed event processing
         _event_queue: Thread-safe queue for passing events to worker thread
         _completed: Whether the event stream has completed
@@ -44,11 +47,12 @@ class ScenarioEventBus:
                        Defaults to 3 attempts with exponential backoff.
         """
         self._event_reporter: EventReporter = event_reporter or EventReporter()
+        self._event_alert_message_logger = EventAlertMessageLogger()
         self._max_retries = max_retries
         # Custom logger for this class
         self.logger = logging.getLogger(__name__)
         # Threading infrastructure
         self._event_queue: queue.Queue[ScenarioEvent] = queue.Queue()
         self._completed = False
@@ -61,9 +65,7 @@ class ScenarioEventBus:
         if self._worker_thread is None or not self._worker_thread.is_alive():
             self.logger.debug("Creating new worker thread")
             self._worker_thread = threading.Thread(
-                target=self._worker_loop,
-                daemon=False,
-                name="ScenarioEventBus-Worker"
+                target=self._worker_loop, daemon=False, name="ScenarioEventBus-Worker"
             )
             self._worker_thread.start()
             self.logger.debug("Worker thread started")
@@ -76,52 +78,108 @@ class ScenarioEventBus:
                 if self._shutdown_event.wait(timeout=0.1):
                     self.logger.debug("Worker thread received shutdown signal")
                     break
                 try:
                     event = self._event_queue.get(timeout=0.1)
-                    self.logger.debug(f"Worker picked up event: {event.type_} ({event.scenario_run_id})")
+                    self.logger.debug(
+                        f"Worker picked up event: {event.type_} ({event.scenario_run_id})"
+                    )
                     self._process_event_sync(event)
                     self._event_queue.task_done()
                 except queue.Empty:
                     # Exit if stream completed and no more events
                     if self._completed:
-                        self.logger.debug("Stream completed and no more events, worker thread exiting")
+                        self.logger.debug(
+                            "Stream completed and no more events, worker thread exiting"
+                        )
                         break
                     continue
             except Exception as e:
                 self.logger.error(f"Worker thread error: {e}")
         self.logger.debug("Worker thread loop ended")
     def _process_event_sync(self, event: ScenarioEvent) -> None:
         """
         Process event synchronously in worker thread with retry logic.
         """
-        self.logger.debug(f"Processing HTTP post for {event.type_} ({event.scenario_run_id})")
+        self.logger.debug(
+            f"Processing HTTP post for {event.type_} ({event.scenario_run_id})"
+        )
         try:
-            # Convert async to sync using asyncio.run - this blocks until HTTP completes
-            success = asyncio.run(self._process_event_with_retry(event))
-            if not success:
-                self.logger.warning(f"Failed to process event {event.type_} after {self._max_retries} attempts")
-            else:
-                self.logger.debug(f"Successfully posted {event.type_} ({event.scenario_run_id})")
+            result = self._post_event_with_retry(event)
+            self._handle_event_result(event, result)
         except Exception as e:
             self.logger.error(f"Error processing event {event.type_}: {e}")
-    async def _process_event_with_retry(self, event: ScenarioEvent, attempt: int = 1) -> bool:
+    def _post_event_with_retry(self, event: ScenarioEvent) -> Optional[Dict[str, Any]]:
+        """
+        Post event with retry logic, converting async to sync.
+        """
+        return asyncio.run(self._process_event_with_retry(event))
+    def _handle_event_result(
+        self, event: ScenarioEvent, result: Optional[Dict[str, Any]]
+    ) -> None:
+        """
+        Handle the result of event processing, including logging and watch messages.
+        """
+        if result is None:
+            self.logger.warning(
+                f"Failed to process event {event.type_} after {self._max_retries} attempts"
+            )
+            return
+        self.logger.debug(
+            f"Successfully posted {event.type_} ({event.scenario_run_id})"
+        )
+        # Handle watch message for run started events
+        if event.type_ == "SCENARIO_RUN_STARTED" and result.get("setUrl"):
+            self._handle_watch_message(event, result)
+    def _handle_watch_message(
+        self, event: ScenarioEvent, result: Dict[str, Any]
+    ) -> None:
+        """
+        Handle watch message for scenario run started events.
+        """
+        self._event_alert_message_logger.handle_watch_message(
+            set_url=str(result["setUrl"]),
+        )
+    def _extract_scenario_set_id(self, event: ScenarioEvent) -> str:
+        """
+        Extract scenario set ID from event, handling Unset types from generated models.
+        """
+        scenario_set_id = getattr(event, "scenario_set_id", "default")
+        # Handle Unset type from generated models
+        if hasattr(scenario_set_id, "__class__") and "Unset" in str(
+            scenario_set_id.__class__
+        ):
+            return "default"
+        return str(scenario_set_id)
+    async def _process_event_with_retry(
+        self, event: ScenarioEvent, attempt: int = 1
+    ) -> Optional[Dict[str, Any]]:
         """
         Process a single event with retry logic (now runs in worker thread context).
         """
         try:
             if self._event_reporter:
-                await self._event_reporter.post_event(event)
-            return True
+                return await self._event_reporter.post_event(event)
+            return {}
         except Exception as e:
             if attempt >= self._max_retries:
-                return False
-            print(f"Error processing event (attempt {attempt}/{self._max_retries}): {e}")
+                return None
+            print(
+                f"Error processing event (attempt {attempt}/{self._max_retries}): {e}"
+            )
             await asyncio.sleep(0.1 * (2 ** (attempt - 1)))  # Exponential backoff
             return await self._process_event_with_retry(event, attempt + 1)
@@ -135,7 +193,9 @@ class ScenarioEventBus:
             return
         def handle_event(event: ScenarioEvent) -> None:
-            self.logger.debug(f"Event received, queuing: {event.type_} ({event.scenario_run_id})")
+            self.logger.debug(
+                f"Event received, queuing: {event.type_} ({event.scenario_run_id})"
+            )
             self._get_or_create_worker()
             self._event_queue.put(event)
             self.logger.debug(f"Event queued: {event.type_} ({event.scenario_run_id})")
@@ -144,7 +204,7 @@ class ScenarioEventBus:
         self._subscription = event_stream.subscribe(
             handle_event,
             lambda e: self.logger.error(f"Error in event stream: {e}"),
-            lambda: self._set_completed()
+            lambda: self._set_completed(),
         )
     def _set_completed(self):
@@ -155,17 +215,17 @@ class ScenarioEventBus:
     def drain(self) -> None:
         """
         Waits for all queued events to complete processing.
         This method blocks until all events in the queue have been processed.
         Since _process_event_sync() uses asyncio.run(), HTTP requests complete
         before task_done() is called, so join() ensures everything is finished.
         """
         self.logger.debug("Drain started - waiting for queue to empty")
         # Wait for all events to be processed - this is sufficient!
         self._event_queue.join()
         self.logger.debug("Event queue drained")
         # Signal worker to shutdown and wait for it
         self._shutdown_event.set()
         if self._worker_thread and self._worker_thread.is_alive():
@@ -175,7 +235,7 @@ class ScenarioEventBus:
                 self.logger.warning("Worker thread did not shutdown within timeout")
             else:
                 self.logger.debug("Worker thread shutdown complete")
         self.logger.info("Drain completed")
     def is_completed(self) -> bool:

scenario/_events/event_reporter.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
-import os
 import httpx
-from typing import Optional
+from typing import Optional, Dict, Any
 from .events import ScenarioEvent
+from .event_alert_message_logger import EventAlertMessageLogger
+from scenario.config import LangWatchSettings
 class EventReporter:
@@ -13,51 +14,54 @@ class EventReporter:
     with proper authentication and error handling.
     Args:
-        endpoint (str, optional): The base URL to post events to. Defaults to LANGWATCH_ENDPOINT env var.
-        api_key (str, optional): The API key for authentication. Defaults to LANGWATCH_API_KEY env var.
+        endpoint (str, optional): Override endpoint URL. If not provided, uses LANGWATCH_ENDPOINT env var.
+        api_key (str, optional): Override API key. If not provided, uses LANGWATCH_API_KEY env var.
     Example:
-        event = {
-            "type": "SCENARIO_RUN_STARTED",
-            "batch_run_id": "batch-1",
-            "scenario_id": "scenario-1",
-            "scenario_run_id": "run-1",
-            "metadata": {
-                "name": "test",
-                "description": "test scenario"
-            }
-        }
-        reporter = EventReporter(endpoint="https://api.langwatch.ai", api_key="test-api-key")
-        await reporter.post_event(event)
+        # Using environment variables (LANGWATCH_ENDPOINT, LANGWATCH_API_KEY)
+        reporter = EventReporter()
+        # Override specific values
+        reporter = EventReporter(endpoint="https://langwatch.yourdomain.com")
+        reporter = EventReporter(api_key="your-api-key")
     """
     def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
-        self.endpoint = endpoint or os.getenv("LANGWATCH_ENDPOINT")
-        self.api_key = api_key or os.getenv("LANGWATCH_API_KEY", "")
+        # Load settings from environment variables
+        langwatch_settings = LangWatchSettings()
+        # Allow constructor parameters to override settings
+        self.endpoint = endpoint or langwatch_settings.endpoint
+        self.api_key = api_key or langwatch_settings.api_key
         self.logger = logging.getLogger(__name__)
+        self.event_alert_message_logger = EventAlertMessageLogger()
+        # Show greeting message when reporter is initialized
+        self.event_alert_message_logger.handle_greeting()
-    async def post_event(self, event: ScenarioEvent):
+    async def post_event(self, event: ScenarioEvent) -> Dict[str, Any]:
         """
         Posts an event to the configured endpoint.
         Args:
-            event: A dictionary containing the event data
+            event: A ScenarioEvent containing the event data
         Returns:
-            None - logs success/failure internally
+            Dict containing response data, including setUrl if available
         """
         event_type = event.type_
         self.logger.info(f"[{event_type}] Publishing event ({event.scenario_run_id})")
+        result: Dict[str, Any] = {}
         if not self.endpoint:
             self.logger.warning(
                 "No LANGWATCH_ENDPOINT configured, skipping event posting"
             )
-            return
+            return result
         try:
-            async with httpx.AsyncClient() as client:
+            async with httpx.AsyncClient(follow_redirects=True) as client:
                 response = await client.post(
                     f"{self.endpoint}/api/scenario-events",
                     json=event.to_dict(),
@@ -66,11 +70,19 @@ class EventReporter:
                         "X-Auth-Token": self.api_key,
                     },
                 )
-                self.logger.info(f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})")
+                self.logger.info(
+                    f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})"
+                )
                 if response.is_success:
                     data = response.json()
-                    self.logger.info(f"[{event_type}] POST response: {data} ({event.scenario_run_id})")
+                    self.logger.info(
+                        f"[{event_type}] POST response: {data} ({event.scenario_run_id})"
+                    )
+                    # Extract setUrl from response if available
+                    if isinstance(data, dict) and "url" in data:
+                        result["setUrl"] = data["url"]
                 else:
                     error_text = response.text
                     self.logger.error(
@@ -80,4 +92,7 @@ class EventReporter:
                     )
         except Exception as error:
             self.logger.error(
-                f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}")
+                f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
+            )
+        return result

scenario/_generated/langwatch_api_client/README.md CHANGED Viewed

@@ -1,15 +1,19 @@
 # lang-watch-api-client
 **⚠️ AUTO-GENERATED CODE - DO NOT EDIT MANUALLY ⚠️**
 This is an auto-generated client library for accessing LangWatch API, created using `openapi-python-client`.
 ## Regeneration
 To regenerate this client:
 ```bash
 make generate-openapi-client
 ```
 ## Source
 Generated from: `../langwatch-saas/langwatch/langwatch/src/app/api/openapiLangWatch.json`
 ---
@@ -17,12 +21,13 @@ Generated from: `../langwatch-saas/langwatch/langwatch/src/app/api/openapiLangWa
 A client library for accessing LangWatch API
 ## Usage
 First, create a client:
 ```python
 from lang_watch_api_client import Client
-client = Client(base_url="https://api.langwatch.ai")
+client = Client(base_url="https://app.langwatch.ai")
 ```
 If the endpoints you're going to hit require authentication, use `AuthenticatedClient` instead:
@@ -30,7 +35,7 @@ If the endpoints you're going to hit require authentication, use `AuthenticatedC
 ```python
 from lang_watch_api_client import AuthenticatedClient
-client = AuthenticatedClient(base_url="https://api.langwatch.ai", token="SuperSecretToken")
+client = AuthenticatedClient(base_url="https://app.langwatch.ai", token="SuperSecretToken")
 ```
 Now call your endpoint and use your models:
@@ -62,7 +67,7 @@ By default, when you're calling an HTTPS API it will attempt to verify that SSL
 ```python
 client = AuthenticatedClient(
-    base_url="https://internal_api.langwatch.ai",
+    base_url="https://app.langwatch.ai",
     token="SuperSecretToken",
     verify_ssl="/path/to/certificate_bundle.pem",
 )
@@ -72,18 +77,20 @@ You can also disable certificate validation altogether, but beware that **this i
 ```python
 client = AuthenticatedClient(
-    base_url="https://internal_api.langwatch.ai",
-    token="SuperSecretToken",
+    base_url="https://app.langwatch.ai",
+    token="SuperSecretToken",
     verify_ssl=False
 )
 ```
 Things to know:
 1. Every path/method combo becomes a Python module with four functions:
-    1. `sync`: Blocking request that returns parsed data (if successful) or `None`
-    1. `sync_detailed`: Blocking request that always returns a `Request`, optionally with `parsed` set if the request was successful.
-    1. `asyncio`: Like `sync` but async instead of blocking
-    1. `asyncio_detailed`: Like `sync_detailed` but async instead of blocking
+   1. `sync`: Blocking request that returns parsed data (if successful) or `None`
+   1. `sync_detailed`: Blocking request that always returns a `Request`, optionally with `parsed` set if the request was successful.
+   1. `asyncio`: Like `sync` but async instead of blocking
+   1. `asyncio_detailed`: Like `sync_detailed` but async instead of blocking
 1. All path/query params, and bodies become method arguments.
 1. If your endpoint had any tags on it, the first tag will be used as a module name for the function (my_tag above)
@@ -104,7 +111,7 @@ def log_response(response):
     print(f"Response event hook: {request.method} {request.url} - Status {response.status_code}")
 client = Client(
-    base_url="https://api.langwatch.ai",
+    base_url="https://app.langwatch.ai",
     httpx_args={"event_hooks": {"request": [log_request], "response": [log_response]}},
 )
@@ -118,22 +125,25 @@ import httpx
 from lang_watch_api_client import Client
 client = Client(
-    base_url="https://api.langwatch.ai",
+    base_url="https://app.langwatch.ai",
 )
 # Note that base_url needs to be re-set, as would any shared cookies, headers, etc.
-client.set_httpx_client(httpx.Client(base_url="https://api.langwatch.ai", proxies="http://localhost:8030"))
+client.set_httpx_client(httpx.Client(base_url="https://app.langwatch.ai", proxies="http://localhost:8030"))
 ```
 ## Building / publishing this package
-This project uses [Poetry](https://python-poetry.org/) to manage dependencies  and packaging.  Here are the basics:
+This project uses [Poetry](https://python-poetry.org/) to manage dependencies and packaging. Here are the basics:
 1. Update the metadata in pyproject.toml (e.g. authors, version)
 1. If you're using a private repository, configure it with Poetry
-    1. `poetry config repositories.<your-repository-name> <url-to-your-repository>`
-    1. `poetry config http-basic.<your-repository-name> <username> <password>`
+   1. `poetry config repositories.<your-repository-name> <url-to-your-repository>`
+   1. `poetry config http-basic.<your-repository-name> <username> <password>`
 1. Publish the client with `poetry publish --build -r <your-repository-name>` or, if for public PyPI, just `poetry publish --build`
 If you want to install this client into another project without publishing it (e.g. for development) then:
 1. If that project **is using Poetry**, you can simply do `poetry add <path-to-this-client>` from that project
 1. If that project is not using Poetry:
-    1. Build a wheel with `poetry build -f wheel`
-    1. Install that wheel from the other project `pip install <path-to-wheel>`
+   1. Build a wheel with `poetry build -f wheel`
+   1. Install that wheel from the other project `pip install <path-to-wheel>`

scenario/config/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+Configuration module for Scenario.
+This module provides all configuration classes for customizing the behavior
+of the Scenario testing framework, including model settings, scenario execution
+parameters, and LangWatch integration.
+Classes:
+    ModelConfig: Configuration for LLM model settings
+    ScenarioConfig: Main configuration for scenario execution
+    LangWatchSettings: Configuration for LangWatch API integration
+Example:
+    ```
+    from scenario.config import ModelConfig, ScenarioConfig, LangWatchSettings
+    # Configure LLM model
+    model_config = ModelConfig(
+        model="openai/gpt-4.1-mini",
+        temperature=0.1
+    )
+    # Configure scenario execution
+    scenario_config = ScenarioConfig(
+        default_model=model_config,
+        max_turns=15,
+        verbose=True
+    )
+    # Configure LangWatch integration
+    langwatch_settings = LangWatchSettings()  # Reads from environment
+    ```
+"""
+from .model import ModelConfig
+from .scenario import ScenarioConfig
+from .langwatch import LangWatchSettings
+__all__ = [
+    "ModelConfig",
+    "ScenarioConfig",
+    "LangWatchSettings",
+]

scenario/config/langwatch.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""
+LangWatch configuration for Scenario.
+This module provides configuration for LangWatch API integration,
+including endpoint URLs and authentication credentials.
+"""
+from pydantic import Field, HttpUrl
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class LangWatchSettings(BaseSettings):
+    """
+    Configuration for LangWatch API integration.
+    This class handles configuration for connecting to LangWatch services,
+    automatically reading from environment variables with the LANGWATCH_ prefix.
+    Attributes:
+        endpoint: LangWatch API endpoint URL
+        api_key: API key for LangWatch authentication
+    Environment Variables:
+        LANGWATCH_ENDPOINT: LangWatch API endpoint (defaults to https://app.langwatch.ai)
+        LANGWATCH_API_KEY: API key for authentication (defaults to empty string)
+    Example:
+        ```
+        # Using environment variables
+        # export LANGWATCH_ENDPOINT="https://app.langwatch.ai"
+        # export LANGWATCH_API_KEY="your-api-key"
+        settings = LangWatchSettings()
+        print(settings.endpoint)  # https://app.langwatch.ai
+        print(settings.api_key)   # your-api-key
+        # Or override programmatically
+        settings = LangWatchSettings(
+            endpoint="https://custom.langwatch.ai",
+            api_key="your-api-key"
+        )
+        ```
+    """
+    model_config = SettingsConfigDict(env_prefix="LANGWATCH_", case_sensitive=False)
+    endpoint: HttpUrl = Field(
+        default=HttpUrl("https://app.langwatch.ai"),
+        description="LangWatch API endpoint URL",
+    )
+    api_key: str = Field(default="", description="API key for LangWatch authentication")

scenario/config/model.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""
+Model configuration for Scenario.
+This module provides configuration classes for LLM model settings used by
+user simulator and judge agents in the Scenario framework.
+"""
+from typing import Optional
+from pydantic import BaseModel
+class ModelConfig(BaseModel):
+    """
+    Configuration for LLM model settings.
+    This class encapsulates all the parameters needed to configure an LLM model
+    for use with user simulator and judge agents in the Scenario framework.
+    Attributes:
+        model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
+        api_key: Optional API key for the model provider
+        temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
+        max_tokens: Maximum number of tokens to generate in responses
+    Example:
+        ```
+        model_config = ModelConfig(
+            model="openai/gpt-4.1",
+            api_key="your-api-key",
+            temperature=0.1,
+            max_tokens=1000
+        )
+        ```
+    """
+    model: str
+    api_key: Optional[str] = None
+    temperature: float = 0.0
+    max_tokens: Optional[int] = None

scenario/{config.py → config/scenario.py} RENAMED Viewed

@@ -1,43 +1,14 @@
 """
-Configuration module for Scenario.
+Scenario configuration for Scenario.
-This module provides configuration classes for customizing the behavior of the
-Scenario testing framework, including LLM model settings, execution parameters,
-and debugging options.
+This module provides the main configuration class for customizing the behavior
+of the Scenario testing framework, including execution parameters and debugging options.
 """
 from typing import Optional, Union, ClassVar
 from pydantic import BaseModel
-class ModelConfig(BaseModel):
-    """
-    Configuration for LLM model settings.
-    This class encapsulates all the parameters needed to configure an LLM model
-    for use with user simulator and judge agents in the Scenario framework.
-    Attributes:
-        model: The model identifier (e.g., "openai/gpt-4.1-mini", "anthropic/claude-3-sonnet")
-        api_key: Optional API key for the model provider
-        temperature: Sampling temperature for response generation (0.0 = deterministic, 1.0 = creative)
-        max_tokens: Maximum number of tokens to generate in responses
-    Example:
-        ```
-        model_config = ModelConfig(
-            model="openai/gpt-4.1-mini",
-            api_key="your-api-key",
-            temperature=0.1,
-            max_tokens=1000
-        )
-        ```
-    """
-    model: str
-    api_key: Optional[str] = None
-    temperature: float = 0.0
-    max_tokens: Optional[int] = None
+from .model import ModelConfig
 class ScenarioConfig(BaseModel):
@@ -69,7 +40,7 @@ class ScenarioConfig(BaseModel):
         # Or create a specific config instance
         config = ScenarioConfig(
             default_model=ModelConfig(
-                model="openai/gpt-4.1-mini",
+                model="openai/gpt-4.1",
                 temperature=0.2
             ),
             max_turns=20

scenario/judge_agent.py CHANGED Viewed

@@ -62,7 +62,7 @@ class JudgeAgent(AgentAdapter):
         # Customized judge with specific model and behavior
         strict_judge = scenario.JudgeAgent(
-            model="openai/gpt-4.1-mini",
+            model="openai/gpt-4.1",
             criteria=[
                 "Code examples are syntactically correct",
                 "Explanations are technically accurate",
@@ -120,7 +120,7 @@ class JudgeAgent(AgentAdapter):
             criteria: List of success criteria to evaluate the conversation against.
                      Can include both positive requirements ("Agent provides helpful responses")
                      and negative constraints ("Agent should not provide personal information").
-            model: LLM model identifier (e.g., "openai/gpt-4.1-mini").
+            model: LLM model identifier (e.g., "openai/gpt-4.1").
                    If not provided, uses the default model from global configuration.
             api_key: API key for the model provider. If not provided,
                      uses the key from global configuration or environment.

scenario/user_simulator_agent.py CHANGED Viewed

@@ -48,12 +48,12 @@ class UserSimulatorAgent(AgentAdapter):
         # Basic user simulator with default behavior
         user_sim = scenario.UserSimulatorAgent(
-            model="openai/gpt-4.1-mini"
+            model="openai/gpt-4.1"
         )
         # Customized user simulator
         custom_user_sim = scenario.UserSimulatorAgent(
-            model="openai/gpt-4.1-mini",
+            model="openai/gpt-4.1",
             temperature=0.3,
             system_prompt="You are a technical user who asks detailed questions"
         )
@@ -97,7 +97,7 @@ class UserSimulatorAgent(AgentAdapter):
         Initialize a user simulator agent.
         Args:
-            model: LLM model identifier (e.g., "openai/gpt-4.1-mini").
+            model: LLM model identifier (e.g., "openai/gpt-4.1").
                    If not provided, uses the default model from global configuration.
             api_key: API key for the model provider. If not provided,
                      uses the key from global configuration or environment.
@@ -114,11 +114,11 @@ class UserSimulatorAgent(AgentAdapter):
         Example:
             ```
             # Basic user simulator
-            user_sim = UserSimulatorAgent(model="openai/gpt-4.1-mini")
+            user_sim = UserSimulatorAgent(model="openai/gpt-4.1")
             # User simulator with custom persona
             expert_user = UserSimulatorAgent(
-                model="openai/gpt-4.1-mini",
+                model="openai/gpt-4.1",
                 temperature=0.2,
                 system_prompt='''
                 You are an expert software developer testing an AI coding assistant.
@@ -203,7 +203,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
 </scenario>
 <rules>
-- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
+- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user, send the user message and just STOP.
 </rules>
 """,
             },

{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langwatch_scenario-0.7.3.dist-info → langwatch_scenario-0.7.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

langwatch-scenario 0.7.3__py3-none-any.whl → 0.7.7__py3-none-any.whl

langwatch-scenario 0.7.3py3-none-any.whl → 0.7.7py3-none-any.whl