PyPI - langwatch-scenario - Versions diffs - 0.7.2__py3-none-any.whl → 0.7.7__py3-none-any.whl - Mend

langwatch-scenario 0.7.2py3-none-any.whl → 0.7.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/METADATA +56 -12
{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/RECORD +21 -17
scenario/__init__.py +1 -1
scenario/_error_messages.py +2 -2
scenario/_events/event_alert_message_logger.py +95 -0
scenario/_events/event_bus.py +90 -30
scenario/_events/event_reporter.py +43 -28
scenario/_generated/langwatch_api_client/README.md +27 -17
scenario/_utils/__init__.py +16 -3
scenario/_utils/ids.py +76 -38
scenario/config/__init__.py +43 -0
scenario/config/langwatch.py +51 -0
scenario/config/model.py +39 -0
scenario/{config.py → config/scenario.py} +5 -34
scenario/judge_agent.py +2 -2
scenario/scenario_executor.py +16 -4
scenario/scenario_state.py +2 -1
scenario/user_simulator_agent.py +6 -6
{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/WHEEL +0 -0
{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/entry_points.txt +0 -0
{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/top_level.txt +0 -0

scenario/scenario_executor.py CHANGED Viewed

@@ -31,7 +31,7 @@ from scenario._utils import (
     print_openai_messages,
     show_spinner,
     await_if_awaitable,
-    get_or_create_batch_run_id,
+    get_batch_run_id,
     generate_scenario_run_id,
 )
 from openai.types.chat import (
@@ -105,6 +105,7 @@ class ScenarioExecutor:
     event_bus: ScenarioEventBus
     batch_run_id: str
+    scenario_set_id: str
     def __init__(
         self,
@@ -118,6 +119,7 @@ class ScenarioExecutor:
         cache_key: Optional[str] = None,
         debug: Optional[bool] = None,
         event_bus: Optional[ScenarioEventBus] = None,
+        set_id: Optional[str] = None,
     ):
         """
         Initialize a scenario executor.
@@ -139,6 +141,7 @@ class ScenarioExecutor:
             debug: Whether to enable debug mode with step-by-step execution.
                   Overrides global configuration for this scenario.
             event_bus: Optional event bus that will subscribe to this executor's events
+            set_id: Optional set identifier for grouping related scenarios
         """
         self.name = name
         self.description = description
@@ -162,7 +165,8 @@ class ScenarioExecutor:
         self.event_bus = event_bus or ScenarioEventBus()
         self.event_bus.subscribe_to_events(self._events)
-        self.batch_run_id = get_or_create_batch_run_id()
+        self.batch_run_id = get_batch_run_id()
+        self.scenario_set_id = set_id or "default"
     @property
     def events(self) -> Observable:
@@ -702,12 +706,14 @@ class ScenarioExecutor:
             batch_run_id: Unique identifier for the batch of scenario runs
             scenario_run_id: Unique identifier for this specific scenario run
             scenario_id: Human-readable name/identifier for the scenario
+            scenario_set_id: Set identifier for grouping related scenarios
             timestamp: Unix timestamp in milliseconds when the event occurred
         """
         batch_run_id: str
         scenario_run_id: str
         scenario_id: str
+        scenario_set_id: str
         timestamp: int
     def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
@@ -727,6 +733,7 @@ class ScenarioExecutor:
             "batch_run_id": self.batch_run_id,
             "scenario_run_id": scenario_run_id,
             "scenario_id": self.name,
+            "scenario_set_id": self.scenario_set_id,
             "timestamp": int(time.time() * 1000),
         }
@@ -820,6 +827,7 @@ async def run(
     cache_key: Optional[str] = None,
     debug: Optional[bool] = None,
     script: Optional[List[ScriptStep]] = None,
+    set_id: Optional[str] = None,
 ) -> ScenarioResult:
     """
     High-level interface for running a scenario test.
@@ -837,6 +845,7 @@ async def run(
         cache_key: Cache key for deterministic behavior
         debug: Enable debug mode for step-by-step execution
         script: Optional script steps to control scenario flow
+        set_id: Optional set identifier for grouping related scenarios
     Returns:
         ScenarioResult containing the test outcome, conversation history,
@@ -854,7 +863,8 @@ async def run(
                my_agent,
                scenario.UserSimulatorAgent(),
                scenario.JudgeAgent(criteria=["Agent provides helpful response"])
-           ]
+           ],
+           set_id="customer-support-tests"
         )
         # Scripted scenario with custom evaluations
@@ -871,7 +881,8 @@ async def run(
                scenario.agent(),
                custom_eval,
                scenario.succeed()
-           ]
+           ],
+           set_id="integration-tests"
         )
         # Results analysis
@@ -889,6 +900,7 @@ async def run(
         cache_key=cache_key,
         debug=debug,
         script=script,
+        set_id=set_id,
     )
     # We'll use a thread pool to run the execution logic, we

scenario/scenario_state.py CHANGED Viewed

@@ -6,7 +6,7 @@ of a scenario execution, including conversation history, turn tracking, and
 utility methods for inspecting the conversation.
 """
-from typing import List, Dict, Any, Optional, TYPE_CHECKING
+from typing import List, Optional, TYPE_CHECKING
 from openai.types.chat import (
     ChatCompletionMessageParam,
     ChatCompletionMessageToolCallParam,
@@ -68,6 +68,7 @@ class ScenarioState(BaseModel):
         )
         ```
     """
     description: str
     messages: List[ChatCompletionMessageParam]
     thread_id: str

scenario/user_simulator_agent.py CHANGED Viewed

@@ -48,12 +48,12 @@ class UserSimulatorAgent(AgentAdapter):
         # Basic user simulator with default behavior
         user_sim = scenario.UserSimulatorAgent(
-            model="openai/gpt-4.1-mini"
+            model="openai/gpt-4.1"
         )
         # Customized user simulator
         custom_user_sim = scenario.UserSimulatorAgent(
-            model="openai/gpt-4.1-mini",
+            model="openai/gpt-4.1",
             temperature=0.3,
             system_prompt="You are a technical user who asks detailed questions"
         )
@@ -97,7 +97,7 @@ class UserSimulatorAgent(AgentAdapter):
         Initialize a user simulator agent.
         Args:
-            model: LLM model identifier (e.g., "openai/gpt-4.1-mini").
+            model: LLM model identifier (e.g., "openai/gpt-4.1").
                    If not provided, uses the default model from global configuration.
             api_key: API key for the model provider. If not provided,
                      uses the key from global configuration or environment.
@@ -114,11 +114,11 @@ class UserSimulatorAgent(AgentAdapter):
         Example:
             ```
             # Basic user simulator
-            user_sim = UserSimulatorAgent(model="openai/gpt-4.1-mini")
+            user_sim = UserSimulatorAgent(model="openai/gpt-4.1")
             # User simulator with custom persona
             expert_user = UserSimulatorAgent(
-                model="openai/gpt-4.1-mini",
+                model="openai/gpt-4.1",
                 temperature=0.2,
                 system_prompt='''
                 You are an expert software developer testing an AI coding assistant.
@@ -203,7 +203,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
 </scenario>
 <rules>
-- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
+- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user, send the user message and just STOP.
 </rules>
 """,
             },

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

langwatch-scenario 0.7.2__py3-none-any.whl → 0.7.7__py3-none-any.whl

langwatch-scenario 0.7.2py3-none-any.whl → 0.7.7py3-none-any.whl