langwatch-scenario 0.7.2__py3-none-any.whl → 0.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/METADATA +56 -12
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/RECORD +21 -17
- scenario/__init__.py +1 -1
- scenario/_error_messages.py +2 -2
- scenario/_events/event_alert_message_logger.py +95 -0
- scenario/_events/event_bus.py +90 -30
- scenario/_events/event_reporter.py +43 -28
- scenario/_generated/langwatch_api_client/README.md +27 -17
- scenario/_utils/__init__.py +16 -3
- scenario/_utils/ids.py +76 -38
- scenario/config/__init__.py +43 -0
- scenario/config/langwatch.py +51 -0
- scenario/config/model.py +39 -0
- scenario/{config.py → config/scenario.py} +5 -34
- scenario/judge_agent.py +2 -2
- scenario/scenario_executor.py +16 -4
- scenario/scenario_state.py +2 -1
- scenario/user_simulator_agent.py +6 -6
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.7.dist-info}/top_level.txt +0 -0
scenario/scenario_executor.py
CHANGED
@@ -31,7 +31,7 @@ from scenario._utils import (
|
|
31
31
|
print_openai_messages,
|
32
32
|
show_spinner,
|
33
33
|
await_if_awaitable,
|
34
|
-
|
34
|
+
get_batch_run_id,
|
35
35
|
generate_scenario_run_id,
|
36
36
|
)
|
37
37
|
from openai.types.chat import (
|
@@ -105,6 +105,7 @@ class ScenarioExecutor:
|
|
105
105
|
event_bus: ScenarioEventBus
|
106
106
|
|
107
107
|
batch_run_id: str
|
108
|
+
scenario_set_id: str
|
108
109
|
|
109
110
|
def __init__(
|
110
111
|
self,
|
@@ -118,6 +119,7 @@ class ScenarioExecutor:
|
|
118
119
|
cache_key: Optional[str] = None,
|
119
120
|
debug: Optional[bool] = None,
|
120
121
|
event_bus: Optional[ScenarioEventBus] = None,
|
122
|
+
set_id: Optional[str] = None,
|
121
123
|
):
|
122
124
|
"""
|
123
125
|
Initialize a scenario executor.
|
@@ -139,6 +141,7 @@ class ScenarioExecutor:
|
|
139
141
|
debug: Whether to enable debug mode with step-by-step execution.
|
140
142
|
Overrides global configuration for this scenario.
|
141
143
|
event_bus: Optional event bus that will subscribe to this executor's events
|
144
|
+
set_id: Optional set identifier for grouping related scenarios
|
142
145
|
"""
|
143
146
|
self.name = name
|
144
147
|
self.description = description
|
@@ -162,7 +165,8 @@ class ScenarioExecutor:
|
|
162
165
|
self.event_bus = event_bus or ScenarioEventBus()
|
163
166
|
self.event_bus.subscribe_to_events(self._events)
|
164
167
|
|
165
|
-
self.batch_run_id =
|
168
|
+
self.batch_run_id = get_batch_run_id()
|
169
|
+
self.scenario_set_id = set_id or "default"
|
166
170
|
|
167
171
|
@property
|
168
172
|
def events(self) -> Observable:
|
@@ -702,12 +706,14 @@ class ScenarioExecutor:
|
|
702
706
|
batch_run_id: Unique identifier for the batch of scenario runs
|
703
707
|
scenario_run_id: Unique identifier for this specific scenario run
|
704
708
|
scenario_id: Human-readable name/identifier for the scenario
|
709
|
+
scenario_set_id: Set identifier for grouping related scenarios
|
705
710
|
timestamp: Unix timestamp in milliseconds when the event occurred
|
706
711
|
"""
|
707
712
|
|
708
713
|
batch_run_id: str
|
709
714
|
scenario_run_id: str
|
710
715
|
scenario_id: str
|
716
|
+
scenario_set_id: str
|
711
717
|
timestamp: int
|
712
718
|
|
713
719
|
def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
|
@@ -727,6 +733,7 @@ class ScenarioExecutor:
|
|
727
733
|
"batch_run_id": self.batch_run_id,
|
728
734
|
"scenario_run_id": scenario_run_id,
|
729
735
|
"scenario_id": self.name,
|
736
|
+
"scenario_set_id": self.scenario_set_id,
|
730
737
|
"timestamp": int(time.time() * 1000),
|
731
738
|
}
|
732
739
|
|
@@ -820,6 +827,7 @@ async def run(
|
|
820
827
|
cache_key: Optional[str] = None,
|
821
828
|
debug: Optional[bool] = None,
|
822
829
|
script: Optional[List[ScriptStep]] = None,
|
830
|
+
set_id: Optional[str] = None,
|
823
831
|
) -> ScenarioResult:
|
824
832
|
"""
|
825
833
|
High-level interface for running a scenario test.
|
@@ -837,6 +845,7 @@ async def run(
|
|
837
845
|
cache_key: Cache key for deterministic behavior
|
838
846
|
debug: Enable debug mode for step-by-step execution
|
839
847
|
script: Optional script steps to control scenario flow
|
848
|
+
set_id: Optional set identifier for grouping related scenarios
|
840
849
|
|
841
850
|
Returns:
|
842
851
|
ScenarioResult containing the test outcome, conversation history,
|
@@ -854,7 +863,8 @@ async def run(
|
|
854
863
|
my_agent,
|
855
864
|
scenario.UserSimulatorAgent(),
|
856
865
|
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
857
|
-
]
|
866
|
+
],
|
867
|
+
set_id="customer-support-tests"
|
858
868
|
)
|
859
869
|
|
860
870
|
# Scripted scenario with custom evaluations
|
@@ -871,7 +881,8 @@ async def run(
|
|
871
881
|
scenario.agent(),
|
872
882
|
custom_eval,
|
873
883
|
scenario.succeed()
|
874
|
-
]
|
884
|
+
],
|
885
|
+
set_id="integration-tests"
|
875
886
|
)
|
876
887
|
|
877
888
|
# Results analysis
|
@@ -889,6 +900,7 @@ async def run(
|
|
889
900
|
cache_key=cache_key,
|
890
901
|
debug=debug,
|
891
902
|
script=script,
|
903
|
+
set_id=set_id,
|
892
904
|
)
|
893
905
|
|
894
906
|
# We'll use a thread pool to run the execution logic, we
|
scenario/scenario_state.py
CHANGED
@@ -6,7 +6,7 @@ of a scenario execution, including conversation history, turn tracking, and
|
|
6
6
|
utility methods for inspecting the conversation.
|
7
7
|
"""
|
8
8
|
|
9
|
-
from typing import List,
|
9
|
+
from typing import List, Optional, TYPE_CHECKING
|
10
10
|
from openai.types.chat import (
|
11
11
|
ChatCompletionMessageParam,
|
12
12
|
ChatCompletionMessageToolCallParam,
|
@@ -68,6 +68,7 @@ class ScenarioState(BaseModel):
|
|
68
68
|
)
|
69
69
|
```
|
70
70
|
"""
|
71
|
+
|
71
72
|
description: str
|
72
73
|
messages: List[ChatCompletionMessageParam]
|
73
74
|
thread_id: str
|
scenario/user_simulator_agent.py
CHANGED
@@ -48,12 +48,12 @@ class UserSimulatorAgent(AgentAdapter):
|
|
48
48
|
|
49
49
|
# Basic user simulator with default behavior
|
50
50
|
user_sim = scenario.UserSimulatorAgent(
|
51
|
-
model="openai/gpt-4.1
|
51
|
+
model="openai/gpt-4.1"
|
52
52
|
)
|
53
53
|
|
54
54
|
# Customized user simulator
|
55
55
|
custom_user_sim = scenario.UserSimulatorAgent(
|
56
|
-
model="openai/gpt-4.1
|
56
|
+
model="openai/gpt-4.1",
|
57
57
|
temperature=0.3,
|
58
58
|
system_prompt="You are a technical user who asks detailed questions"
|
59
59
|
)
|
@@ -97,7 +97,7 @@ class UserSimulatorAgent(AgentAdapter):
|
|
97
97
|
Initialize a user simulator agent.
|
98
98
|
|
99
99
|
Args:
|
100
|
-
model: LLM model identifier (e.g., "openai/gpt-4.1
|
100
|
+
model: LLM model identifier (e.g., "openai/gpt-4.1").
|
101
101
|
If not provided, uses the default model from global configuration.
|
102
102
|
api_key: API key for the model provider. If not provided,
|
103
103
|
uses the key from global configuration or environment.
|
@@ -114,11 +114,11 @@ class UserSimulatorAgent(AgentAdapter):
|
|
114
114
|
Example:
|
115
115
|
```
|
116
116
|
# Basic user simulator
|
117
|
-
user_sim = UserSimulatorAgent(model="openai/gpt-4.1
|
117
|
+
user_sim = UserSimulatorAgent(model="openai/gpt-4.1")
|
118
118
|
|
119
119
|
# User simulator with custom persona
|
120
120
|
expert_user = UserSimulatorAgent(
|
121
|
-
model="openai/gpt-4.1
|
121
|
+
model="openai/gpt-4.1",
|
122
122
|
temperature=0.2,
|
123
123
|
system_prompt='''
|
124
124
|
You are an expert software developer testing an AI coding assistant.
|
@@ -203,7 +203,7 @@ Your goal (assistant) is to interact with the Agent Under Test (user) as if you
|
|
203
203
|
</scenario>
|
204
204
|
|
205
205
|
<rules>
|
206
|
-
- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
|
206
|
+
- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user, send the user message and just STOP.
|
207
207
|
</rules>
|
208
208
|
""",
|
209
209
|
},
|
File without changes
|
File without changes
|
File without changes
|