langwatch-scenario 0.7.2__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/METADATA +43 -1
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/RECORD +9 -9
- scenario/_utils/__init__.py +16 -3
- scenario/_utils/ids.py +76 -38
- scenario/scenario_executor.py +16 -4
- scenario/scenario_state.py +2 -1
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/WHEEL +0 -0
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/entry_points.txt +0 -0
- {langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langwatch-scenario
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3
|
4
4
|
Summary: The end-to-end agent testing library
|
5
5
|
Author-email: LangWatch Team <support@langwatch.ai>
|
6
6
|
License: MIT
|
@@ -450,6 +450,48 @@ class MyAgent:
|
|
450
450
|
|
451
451
|
This will cache any function call you decorate when running the tests and make them repeatable, hashed by the function arguments, the scenario being executed, and the `cache_key` you provided. You can exclude arguments that should not be hashed for the cache key by naming them in the `ignore` argument.
|
452
452
|
|
453
|
+
## Grouping Your Sets and Batches
|
454
|
+
|
455
|
+
While optional, we strongly recommend setting stable identifiers for your scenarios, sets, and batches for better organization and tracking in LangWatch.
|
456
|
+
|
457
|
+
- **set_id**: Groups related scenarios into a test suite. This corresponds to the "Simulation Set" in the UI.
|
458
|
+
- **batch_run_id**: Groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
|
459
|
+
|
460
|
+
```python
|
461
|
+
import os
|
462
|
+
|
463
|
+
result = await scenario.run(
|
464
|
+
name="my first scenario",
|
465
|
+
description="A simple test to see if the agent responds.",
|
466
|
+
set_id="my-test-suite",
|
467
|
+
agents=[
|
468
|
+
scenario.Agent(my_agent),
|
469
|
+
scenario.UserSimulatorAgent(),
|
470
|
+
]
|
471
|
+
)
|
472
|
+
```
|
473
|
+
|
474
|
+
You can also set the `batch_run_id` using environment variables for CI/CD integration:
|
475
|
+
|
476
|
+
```python
|
477
|
+
import os
|
478
|
+
|
479
|
+
# Set batch ID for CI/CD integration
|
480
|
+
os.environ["SCENARIO_BATCH_RUN_ID"] = os.environ.get("GITHUB_RUN_ID", "local-run")
|
481
|
+
|
482
|
+
result = await scenario.run(
|
483
|
+
name="my first scenario",
|
484
|
+
description="A simple test to see if the agent responds.",
|
485
|
+
set_id="my-test-suite",
|
486
|
+
agents=[
|
487
|
+
scenario.Agent(my_agent),
|
488
|
+
scenario.UserSimulatorAgent(),
|
489
|
+
]
|
490
|
+
)
|
491
|
+
```
|
492
|
+
|
493
|
+
The `batch_run_id` is automatically generated for each test run, but you can also set it globally using the `SCENARIO_BATCH_RUN_ID` environment variable.
|
494
|
+
|
453
495
|
## Disable Output
|
454
496
|
|
455
497
|
You can remove the `-s` flag from pytest to hide the output during test, which will only show up if the test fails. Alternatively, you can set `verbose=False` in the `Scenario.configure` method or in the specific scenario you are running.
|
@@ -5,8 +5,8 @@ scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
|
|
5
5
|
scenario/config.py,sha256=xhUuXH-sThwPTmJNSuajKxX-WC_tcFwJ1jZc119DswA,6093
|
6
6
|
scenario/judge_agent.py,sha256=d8vORsqpUPIA4yhlBTv5Yi4I2MdcfXselYBTFvfZx-4,16221
|
7
7
|
scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
|
8
|
-
scenario/scenario_executor.py,sha256=
|
9
|
-
scenario/scenario_state.py,sha256=
|
8
|
+
scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
|
9
|
+
scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
|
10
10
|
scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
|
11
11
|
scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
|
12
12
|
scenario/user_simulator_agent.py,sha256=fhwi8W44s343BGrjJXSJw960wcK7MgwTg-epxR1bqHo,9088
|
@@ -226,12 +226,12 @@ scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_req
|
|
226
226
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py,sha256=zDYmJ8bFBSJyF9D3cEn_ffrey-ITIfwr-_7eu72zLyk,2832
|
227
227
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py,sha256=-nRKUPZTAJQNxiKz128xF7DKgZNbFo4G3mr5xNXrkaw,2173
|
228
228
|
scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
|
229
|
-
scenario/_utils/__init__.py,sha256=
|
230
|
-
scenario/_utils/ids.py,sha256=
|
229
|
+
scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1311
|
230
|
+
scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
|
231
231
|
scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
|
232
232
|
scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
|
233
|
-
langwatch_scenario-0.7.
|
234
|
-
langwatch_scenario-0.7.
|
235
|
-
langwatch_scenario-0.7.
|
236
|
-
langwatch_scenario-0.7.
|
237
|
-
langwatch_scenario-0.7.
|
233
|
+
langwatch_scenario-0.7.3.dist-info/METADATA,sha256=5vyo2hMNsKaJKUbDBxUv7-YSD85ufDqczfLS6yp5b1Y,19959
|
234
|
+
langwatch_scenario-0.7.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
235
|
+
langwatch_scenario-0.7.3.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
|
236
|
+
langwatch_scenario-0.7.3.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
|
237
|
+
langwatch_scenario-0.7.3.dist-info/RECORD,,
|
scenario/_utils/__init__.py
CHANGED
@@ -7,7 +7,15 @@ for better user experience during scenario execution.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from .message_conversion import convert_agent_return_types_to_openai_messages
|
10
|
-
from .ids import
|
10
|
+
from .ids import (
|
11
|
+
get_batch_run_id,
|
12
|
+
get_or_create_batch_run_id, # Backward compatibility
|
13
|
+
generate_scenario_run_id,
|
14
|
+
generate_scenario_id,
|
15
|
+
generate_thread_id,
|
16
|
+
generate_message_id,
|
17
|
+
safe_parse_uuid,
|
18
|
+
)
|
11
19
|
from .utils import (
|
12
20
|
SerializableAndPydanticEncoder,
|
13
21
|
SerializableWithStringFallback,
|
@@ -20,8 +28,13 @@ from .utils import (
|
|
20
28
|
|
21
29
|
__all__ = [
|
22
30
|
"convert_agent_return_types_to_openai_messages",
|
23
|
-
"
|
31
|
+
"get_batch_run_id",
|
32
|
+
"get_or_create_batch_run_id", # Backward compatibility
|
24
33
|
"generate_scenario_run_id",
|
34
|
+
"generate_scenario_id",
|
35
|
+
"generate_thread_id",
|
36
|
+
"generate_message_id",
|
37
|
+
"safe_parse_uuid",
|
25
38
|
"SerializableAndPydanticEncoder",
|
26
39
|
"SerializableWithStringFallback",
|
27
40
|
"print_openai_messages",
|
@@ -29,4 +42,4 @@ __all__ = [
|
|
29
42
|
"check_valid_return_type",
|
30
43
|
"reverse_roles",
|
31
44
|
"await_if_awaitable",
|
32
|
-
]
|
45
|
+
]
|
scenario/_utils/ids.py
CHANGED
@@ -10,49 +10,87 @@ import os
|
|
10
10
|
import uuid
|
11
11
|
|
12
12
|
|
13
|
-
def
|
13
|
+
def generate_thread_id() -> str:
|
14
|
+
"""
|
15
|
+
Generates a new thread ID.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
str: A new thread ID.
|
19
|
+
"""
|
20
|
+
return f"thread_{uuid.uuid4()}"
|
21
|
+
|
22
|
+
|
23
|
+
def generate_scenario_run_id() -> str:
|
24
|
+
"""
|
25
|
+
Generates a new scenario run ID.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
str: A new scenario run ID.
|
29
|
+
"""
|
30
|
+
return f"scenariorun_{uuid.uuid4()}"
|
31
|
+
|
32
|
+
|
33
|
+
def generate_scenario_id() -> str:
|
34
|
+
"""
|
35
|
+
Generates a new scenario ID.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
str: A new scenario ID.
|
39
|
+
"""
|
40
|
+
return f"scenario_{uuid.uuid4()}"
|
41
|
+
|
42
|
+
|
43
|
+
def get_batch_run_id() -> str:
|
14
44
|
"""
|
15
|
-
Gets
|
16
|
-
|
17
|
-
|
18
|
-
execution, allowing grouping of related scenario runs. This is useful
|
19
|
-
for tracking and reporting on batches of scenarios run together.
|
20
|
-
|
45
|
+
Gets the batch run ID. If it's not set, it will be generated.
|
46
|
+
It can be set via the SCENARIO_BATCH_RUN_ID environment variable.
|
47
|
+
|
21
48
|
Returns:
|
22
|
-
str:
|
23
|
-
|
24
|
-
Example:
|
25
|
-
```python
|
26
|
-
# All scenarios in same process will share this ID
|
27
|
-
batch_id = get_or_create_batch_run_id()
|
28
|
-
print(f"Running scenario in batch: {batch_id}")
|
29
|
-
```
|
30
|
-
"""
|
31
|
-
|
49
|
+
str: The batch run ID.
|
50
|
+
"""
|
32
51
|
# Check if batch ID already exists in environment
|
33
|
-
|
52
|
+
batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
|
53
|
+
if not batch_run_id:
|
34
54
|
# Generate new batch ID if not set
|
35
|
-
|
36
|
-
|
37
|
-
return os.environ["SCENARIO_BATCH_ID"]
|
55
|
+
batch_run_id = f"scenariobatchrun_{uuid.uuid4()}"
|
56
|
+
os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
|
38
57
|
|
58
|
+
return batch_run_id
|
39
59
|
|
40
|
-
|
60
|
+
|
61
|
+
def generate_message_id() -> str:
|
62
|
+
"""
|
63
|
+
Generates a new message ID.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
str: A new message ID.
|
67
|
+
"""
|
68
|
+
return f"scenariomsg_{uuid.uuid4()}"
|
69
|
+
|
70
|
+
|
71
|
+
def safe_parse_uuid(id_str: str) -> bool:
|
41
72
|
"""
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
73
|
+
Safely parses a UUID string.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
id_str: The UUID string to parse.
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
bool: True if the UUID string is valid, false otherwise.
|
80
|
+
"""
|
81
|
+
try:
|
82
|
+
uuid.UUID(id_str)
|
83
|
+
return True
|
84
|
+
except (ValueError, TypeError):
|
85
|
+
return False
|
86
|
+
|
87
|
+
|
88
|
+
# Backward compatibility aliases
|
89
|
+
def get_or_create_batch_run_id() -> str:
|
90
|
+
"""
|
91
|
+
Backward compatibility alias for get_batch_run_id().
|
92
|
+
|
48
93
|
Returns:
|
49
|
-
str:
|
50
|
-
|
51
|
-
|
52
|
-
```python
|
53
|
-
# Each scenario gets its own unique ID
|
54
|
-
scenario_id = generate_scenario_run_id()
|
55
|
-
print(f"Running scenario with ID: {scenario_id}")
|
56
|
-
```
|
57
|
-
"""
|
58
|
-
return f"scenario-run-{uuid.uuid4()}"
|
94
|
+
str: The batch run ID.
|
95
|
+
"""
|
96
|
+
return get_batch_run_id()
|
scenario/scenario_executor.py
CHANGED
@@ -31,7 +31,7 @@ from scenario._utils import (
|
|
31
31
|
print_openai_messages,
|
32
32
|
show_spinner,
|
33
33
|
await_if_awaitable,
|
34
|
-
|
34
|
+
get_batch_run_id,
|
35
35
|
generate_scenario_run_id,
|
36
36
|
)
|
37
37
|
from openai.types.chat import (
|
@@ -105,6 +105,7 @@ class ScenarioExecutor:
|
|
105
105
|
event_bus: ScenarioEventBus
|
106
106
|
|
107
107
|
batch_run_id: str
|
108
|
+
scenario_set_id: str
|
108
109
|
|
109
110
|
def __init__(
|
110
111
|
self,
|
@@ -118,6 +119,7 @@ class ScenarioExecutor:
|
|
118
119
|
cache_key: Optional[str] = None,
|
119
120
|
debug: Optional[bool] = None,
|
120
121
|
event_bus: Optional[ScenarioEventBus] = None,
|
122
|
+
set_id: Optional[str] = None,
|
121
123
|
):
|
122
124
|
"""
|
123
125
|
Initialize a scenario executor.
|
@@ -139,6 +141,7 @@ class ScenarioExecutor:
|
|
139
141
|
debug: Whether to enable debug mode with step-by-step execution.
|
140
142
|
Overrides global configuration for this scenario.
|
141
143
|
event_bus: Optional event bus that will subscribe to this executor's events
|
144
|
+
set_id: Optional set identifier for grouping related scenarios
|
142
145
|
"""
|
143
146
|
self.name = name
|
144
147
|
self.description = description
|
@@ -162,7 +165,8 @@ class ScenarioExecutor:
|
|
162
165
|
self.event_bus = event_bus or ScenarioEventBus()
|
163
166
|
self.event_bus.subscribe_to_events(self._events)
|
164
167
|
|
165
|
-
self.batch_run_id =
|
168
|
+
self.batch_run_id = get_batch_run_id()
|
169
|
+
self.scenario_set_id = set_id or "default"
|
166
170
|
|
167
171
|
@property
|
168
172
|
def events(self) -> Observable:
|
@@ -702,12 +706,14 @@ class ScenarioExecutor:
|
|
702
706
|
batch_run_id: Unique identifier for the batch of scenario runs
|
703
707
|
scenario_run_id: Unique identifier for this specific scenario run
|
704
708
|
scenario_id: Human-readable name/identifier for the scenario
|
709
|
+
scenario_set_id: Set identifier for grouping related scenarios
|
705
710
|
timestamp: Unix timestamp in milliseconds when the event occurred
|
706
711
|
"""
|
707
712
|
|
708
713
|
batch_run_id: str
|
709
714
|
scenario_run_id: str
|
710
715
|
scenario_id: str
|
716
|
+
scenario_set_id: str
|
711
717
|
timestamp: int
|
712
718
|
|
713
719
|
def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
|
@@ -727,6 +733,7 @@ class ScenarioExecutor:
|
|
727
733
|
"batch_run_id": self.batch_run_id,
|
728
734
|
"scenario_run_id": scenario_run_id,
|
729
735
|
"scenario_id": self.name,
|
736
|
+
"scenario_set_id": self.scenario_set_id,
|
730
737
|
"timestamp": int(time.time() * 1000),
|
731
738
|
}
|
732
739
|
|
@@ -820,6 +827,7 @@ async def run(
|
|
820
827
|
cache_key: Optional[str] = None,
|
821
828
|
debug: Optional[bool] = None,
|
822
829
|
script: Optional[List[ScriptStep]] = None,
|
830
|
+
set_id: Optional[str] = None,
|
823
831
|
) -> ScenarioResult:
|
824
832
|
"""
|
825
833
|
High-level interface for running a scenario test.
|
@@ -837,6 +845,7 @@ async def run(
|
|
837
845
|
cache_key: Cache key for deterministic behavior
|
838
846
|
debug: Enable debug mode for step-by-step execution
|
839
847
|
script: Optional script steps to control scenario flow
|
848
|
+
set_id: Optional set identifier for grouping related scenarios
|
840
849
|
|
841
850
|
Returns:
|
842
851
|
ScenarioResult containing the test outcome, conversation history,
|
@@ -854,7 +863,8 @@ async def run(
|
|
854
863
|
my_agent,
|
855
864
|
scenario.UserSimulatorAgent(),
|
856
865
|
scenario.JudgeAgent(criteria=["Agent provides helpful response"])
|
857
|
-
]
|
866
|
+
],
|
867
|
+
set_id="customer-support-tests"
|
858
868
|
)
|
859
869
|
|
860
870
|
# Scripted scenario with custom evaluations
|
@@ -871,7 +881,8 @@ async def run(
|
|
871
881
|
scenario.agent(),
|
872
882
|
custom_eval,
|
873
883
|
scenario.succeed()
|
874
|
-
]
|
884
|
+
],
|
885
|
+
set_id="integration-tests"
|
875
886
|
)
|
876
887
|
|
877
888
|
# Results analysis
|
@@ -889,6 +900,7 @@ async def run(
|
|
889
900
|
cache_key=cache_key,
|
890
901
|
debug=debug,
|
891
902
|
script=script,
|
903
|
+
set_id=set_id,
|
892
904
|
)
|
893
905
|
|
894
906
|
# We'll use a thread pool to run the execution logic, we
|
scenario/scenario_state.py
CHANGED
@@ -6,7 +6,7 @@ of a scenario execution, including conversation history, turn tracking, and
|
|
6
6
|
utility methods for inspecting the conversation.
|
7
7
|
"""
|
8
8
|
|
9
|
-
from typing import List,
|
9
|
+
from typing import List, Optional, TYPE_CHECKING
|
10
10
|
from openai.types.chat import (
|
11
11
|
ChatCompletionMessageParam,
|
12
12
|
ChatCompletionMessageToolCallParam,
|
@@ -68,6 +68,7 @@ class ScenarioState(BaseModel):
|
|
68
68
|
)
|
69
69
|
```
|
70
70
|
"""
|
71
|
+
|
71
72
|
description: str
|
72
73
|
messages: List[ChatCompletionMessageParam]
|
73
74
|
thread_id: str
|
File without changes
|
File without changes
|
File without changes
|