langwatch-scenario 0.7.2__py3-none-any.whl → 0.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,7 +31,7 @@ from scenario._utils import (
31
31
  print_openai_messages,
32
32
  show_spinner,
33
33
  await_if_awaitable,
34
- get_or_create_batch_run_id,
34
+ get_batch_run_id,
35
35
  generate_scenario_run_id,
36
36
  )
37
37
  from openai.types.chat import (
@@ -105,6 +105,7 @@ class ScenarioExecutor:
105
105
  event_bus: ScenarioEventBus
106
106
 
107
107
  batch_run_id: str
108
+ scenario_set_id: str
108
109
 
109
110
  def __init__(
110
111
  self,
@@ -118,6 +119,7 @@ class ScenarioExecutor:
118
119
  cache_key: Optional[str] = None,
119
120
  debug: Optional[bool] = None,
120
121
  event_bus: Optional[ScenarioEventBus] = None,
122
+ set_id: Optional[str] = None,
121
123
  ):
122
124
  """
123
125
  Initialize a scenario executor.
@@ -139,6 +141,7 @@ class ScenarioExecutor:
139
141
  debug: Whether to enable debug mode with step-by-step execution.
140
142
  Overrides global configuration for this scenario.
141
143
  event_bus: Optional event bus that will subscribe to this executor's events
144
+ set_id: Optional set identifier for grouping related scenarios
142
145
  """
143
146
  self.name = name
144
147
  self.description = description
@@ -162,7 +165,8 @@ class ScenarioExecutor:
162
165
  self.event_bus = event_bus or ScenarioEventBus()
163
166
  self.event_bus.subscribe_to_events(self._events)
164
167
 
165
- self.batch_run_id = get_or_create_batch_run_id()
168
+ self.batch_run_id = get_batch_run_id()
169
+ self.scenario_set_id = set_id or "default"
166
170
 
167
171
  @property
168
172
  def events(self) -> Observable:
@@ -702,12 +706,14 @@ class ScenarioExecutor:
702
706
  batch_run_id: Unique identifier for the batch of scenario runs
703
707
  scenario_run_id: Unique identifier for this specific scenario run
704
708
  scenario_id: Human-readable name/identifier for the scenario
709
+ scenario_set_id: Set identifier for grouping related scenarios
705
710
  timestamp: Unix timestamp in milliseconds when the event occurred
706
711
  """
707
712
 
708
713
  batch_run_id: str
709
714
  scenario_run_id: str
710
715
  scenario_id: str
716
+ scenario_set_id: str
711
717
  timestamp: int
712
718
 
713
719
  def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
@@ -727,6 +733,7 @@ class ScenarioExecutor:
727
733
  "batch_run_id": self.batch_run_id,
728
734
  "scenario_run_id": scenario_run_id,
729
735
  "scenario_id": self.name,
736
+ "scenario_set_id": self.scenario_set_id,
730
737
  "timestamp": int(time.time() * 1000),
731
738
  }
732
739
 
@@ -820,6 +827,7 @@ async def run(
820
827
  cache_key: Optional[str] = None,
821
828
  debug: Optional[bool] = None,
822
829
  script: Optional[List[ScriptStep]] = None,
830
+ set_id: Optional[str] = None,
823
831
  ) -> ScenarioResult:
824
832
  """
825
833
  High-level interface for running a scenario test.
@@ -837,6 +845,7 @@ async def run(
837
845
  cache_key: Cache key for deterministic behavior
838
846
  debug: Enable debug mode for step-by-step execution
839
847
  script: Optional script steps to control scenario flow
848
+ set_id: Optional set identifier for grouping related scenarios
840
849
 
841
850
  Returns:
842
851
  ScenarioResult containing the test outcome, conversation history,
@@ -854,7 +863,8 @@ async def run(
854
863
  my_agent,
855
864
  scenario.UserSimulatorAgent(),
856
865
  scenario.JudgeAgent(criteria=["Agent provides helpful response"])
857
- ]
866
+ ],
867
+ set_id="customer-support-tests"
858
868
  )
859
869
 
860
870
  # Scripted scenario with custom evaluations
@@ -871,7 +881,8 @@ async def run(
871
881
  scenario.agent(),
872
882
  custom_eval,
873
883
  scenario.succeed()
874
- ]
884
+ ],
885
+ set_id="integration-tests"
875
886
  )
876
887
 
877
888
  # Results analysis
@@ -889,6 +900,7 @@ async def run(
889
900
  cache_key=cache_key,
890
901
  debug=debug,
891
902
  script=script,
903
+ set_id=set_id,
892
904
  )
893
905
 
894
906
  # We'll use a thread pool to run the execution logic, we
@@ -6,7 +6,7 @@ of a scenario execution, including conversation history, turn tracking, and
6
6
  utility methods for inspecting the conversation.
7
7
  """
8
8
 
9
- from typing import List, Dict, Any, Optional, TYPE_CHECKING
9
+ from typing import List, Optional, TYPE_CHECKING
10
10
  from openai.types.chat import (
11
11
  ChatCompletionMessageParam,
12
12
  ChatCompletionMessageToolCallParam,
@@ -68,6 +68,7 @@ class ScenarioState(BaseModel):
68
68
  )
69
69
  ```
70
70
  """
71
+
71
72
  description: str
72
73
  messages: List[ChatCompletionMessageParam]
73
74
  thread_id: str
@@ -48,12 +48,12 @@ class UserSimulatorAgent(AgentAdapter):
48
48
 
49
49
  # Basic user simulator with default behavior
50
50
  user_sim = scenario.UserSimulatorAgent(
51
- model="openai/gpt-4.1-mini"
51
+ model="openai/gpt-4.1"
52
52
  )
53
53
 
54
54
  # Customized user simulator
55
55
  custom_user_sim = scenario.UserSimulatorAgent(
56
- model="openai/gpt-4.1-mini",
56
+ model="openai/gpt-4.1",
57
57
  temperature=0.3,
58
58
  system_prompt="You are a technical user who asks detailed questions"
59
59
  )
@@ -97,7 +97,7 @@ class UserSimulatorAgent(AgentAdapter):
97
97
  Initialize a user simulator agent.
98
98
 
99
99
  Args:
100
- model: LLM model identifier (e.g., "openai/gpt-4.1-mini").
100
+ model: LLM model identifier (e.g., "openai/gpt-4.1").
101
101
  If not provided, uses the default model from global configuration.
102
102
  api_key: API key for the model provider. If not provided,
103
103
  uses the key from global configuration or environment.
@@ -114,11 +114,11 @@ class UserSimulatorAgent(AgentAdapter):
114
114
  Example:
115
115
  ```
116
116
  # Basic user simulator
117
- user_sim = UserSimulatorAgent(model="openai/gpt-4.1-mini")
117
+ user_sim = UserSimulatorAgent(model="openai/gpt-4.1")
118
118
 
119
119
  # User simulator with custom persona
120
120
  expert_user = UserSimulatorAgent(
121
- model="openai/gpt-4.1-mini",
121
+ model="openai/gpt-4.1",
122
122
  temperature=0.2,
123
123
  system_prompt='''
124
124
  You are an expert software developer testing an AI coding assistant.
@@ -203,7 +203,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
203
203
  </scenario>
204
204
 
205
205
  <rules>
206
- - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
206
+ - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user, send the user message and just STOP.
207
207
  </rules>
208
208
  """,
209
209
  },