langwatch-scenario 0.7.2__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langwatch-scenario
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: The end-to-end agent testing library
5
5
  Author-email: LangWatch Team <support@langwatch.ai>
6
6
  License: MIT
@@ -450,6 +450,48 @@ class MyAgent:
450
450
 
451
451
  This will cache any function call you decorate when running the tests and make them repeatable, hashed by the function arguments, the scenario being executed, and the `cache_key` you provided. You can exclude arguments that should not be hashed for the cache key by naming them in the `ignore` argument.
452
452
 
453
+ ## Grouping Your Sets and Batches
454
+
455
+ While optional, we strongly recommend setting stable identifiers for your scenarios, sets, and batches for better organization and tracking in LangWatch.
456
+
457
+ - **set_id**: Groups related scenarios into a test suite. This corresponds to the "Simulation Set" in the UI.
458
+ - **batch_run_id**: Groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
459
+
460
+ ```python
461
+ import os
462
+
463
+ result = await scenario.run(
464
+ name="my first scenario",
465
+ description="A simple test to see if the agent responds.",
466
+ set_id="my-test-suite",
467
+ agents=[
468
+ scenario.Agent(my_agent),
469
+ scenario.UserSimulatorAgent(),
470
+ ]
471
+ )
472
+ ```
473
+
474
+ You can also set the `batch_run_id` using environment variables for CI/CD integration:
475
+
476
+ ```python
477
+ import os
478
+
479
+ # Set batch ID for CI/CD integration
480
+ os.environ["SCENARIO_BATCH_RUN_ID"] = os.environ.get("GITHUB_RUN_ID", "local-run")
481
+
482
+ result = await scenario.run(
483
+ name="my first scenario",
484
+ description="A simple test to see if the agent responds.",
485
+ set_id="my-test-suite",
486
+ agents=[
487
+ scenario.Agent(my_agent),
488
+ scenario.UserSimulatorAgent(),
489
+ ]
490
+ )
491
+ ```
492
+
493
+ The `batch_run_id` is automatically generated for each test run, but you can also set it globally using the `SCENARIO_BATCH_RUN_ID` environment variable.
494
+
453
495
  ## Disable Output
454
496
 
455
497
  You can remove the `-s` flag from pytest to hide the output during test, which will only show up if the test fails. Alternatively, you can set `verbose=False` in the `Scenario.configure` method or in the specific scenario you are running.
@@ -5,8 +5,8 @@ scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
5
5
  scenario/config.py,sha256=xhUuXH-sThwPTmJNSuajKxX-WC_tcFwJ1jZc119DswA,6093
6
6
  scenario/judge_agent.py,sha256=d8vORsqpUPIA4yhlBTv5Yi4I2MdcfXselYBTFvfZx-4,16221
7
7
  scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
8
- scenario/scenario_executor.py,sha256=EDRFgvyR7vUCX0fC6nMA5loJi3EUAvvyPWc-vCJSpII,32564
9
- scenario/scenario_state.py,sha256=dQDjazem-dn1c5mw6TwngEu6Tv_cHwEzemepsPBy2f0,7039
8
+ scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
9
+ scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
10
10
  scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
11
11
  scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
12
12
  scenario/user_simulator_agent.py,sha256=fhwi8W44s343BGrjJXSJw960wcK7MgwTg-epxR1bqHo,9088
@@ -226,12 +226,12 @@ scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_req
226
226
  scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py,sha256=zDYmJ8bFBSJyF9D3cEn_ffrey-ITIfwr-_7eu72zLyk,2832
227
227
  scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py,sha256=-nRKUPZTAJQNxiKz128xF7DKgZNbFo4G3mr5xNXrkaw,2173
228
228
  scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
229
- scenario/_utils/__init__.py,sha256=5XkMVG8-g0D8PRtmcJ_PJakmPpUXdDX_gNf_jyILUXQ,999
230
- scenario/_utils/ids.py,sha256=K1iPuJgPh3gX9HCrDZGqK5lDgdwZXfOBF1YXVOWNHRg,1843
229
+ scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1311
230
+ scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
231
231
  scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
232
232
  scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
233
- langwatch_scenario-0.7.2.dist-info/METADATA,sha256=hHOIOIP9w51i6daij7jmQER_gMfGK_mHc8HrnO9GO90,18588
234
- langwatch_scenario-0.7.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
235
- langwatch_scenario-0.7.2.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
236
- langwatch_scenario-0.7.2.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
237
- langwatch_scenario-0.7.2.dist-info/RECORD,,
233
+ langwatch_scenario-0.7.3.dist-info/METADATA,sha256=5vyo2hMNsKaJKUbDBxUv7-YSD85ufDqczfLS6yp5b1Y,19959
234
+ langwatch_scenario-0.7.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
235
+ langwatch_scenario-0.7.3.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
236
+ langwatch_scenario-0.7.3.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
237
+ langwatch_scenario-0.7.3.dist-info/RECORD,,
@@ -7,7 +7,15 @@ for better user experience during scenario execution.
7
7
  """
8
8
 
9
9
  from .message_conversion import convert_agent_return_types_to_openai_messages
10
- from .ids import get_or_create_batch_run_id, generate_scenario_run_id
10
+ from .ids import (
11
+ get_batch_run_id,
12
+ get_or_create_batch_run_id, # Backward compatibility
13
+ generate_scenario_run_id,
14
+ generate_scenario_id,
15
+ generate_thread_id,
16
+ generate_message_id,
17
+ safe_parse_uuid,
18
+ )
11
19
  from .utils import (
12
20
  SerializableAndPydanticEncoder,
13
21
  SerializableWithStringFallback,
@@ -20,8 +28,13 @@ from .utils import (
20
28
 
21
29
  __all__ = [
22
30
  "convert_agent_return_types_to_openai_messages",
23
- "get_or_create_batch_run_id",
31
+ "get_batch_run_id",
32
+ "get_or_create_batch_run_id", # Backward compatibility
24
33
  "generate_scenario_run_id",
34
+ "generate_scenario_id",
35
+ "generate_thread_id",
36
+ "generate_message_id",
37
+ "safe_parse_uuid",
25
38
  "SerializableAndPydanticEncoder",
26
39
  "SerializableWithStringFallback",
27
40
  "print_openai_messages",
@@ -29,4 +42,4 @@ __all__ = [
29
42
  "check_valid_return_type",
30
43
  "reverse_roles",
31
44
  "await_if_awaitable",
32
- ]
45
+ ]
scenario/_utils/ids.py CHANGED
@@ -10,49 +10,87 @@ import os
10
10
  import uuid
11
11
 
12
12
 
13
- def get_or_create_batch_run_id() -> str:
13
+ def generate_thread_id() -> str:
14
+ """
15
+ Generates a new thread ID.
16
+
17
+ Returns:
18
+ str: A new thread ID.
19
+ """
20
+ return f"thread_{uuid.uuid4()}"
21
+
22
+
23
+ def generate_scenario_run_id() -> str:
24
+ """
25
+ Generates a new scenario run ID.
26
+
27
+ Returns:
28
+ str: A new scenario run ID.
29
+ """
30
+ return f"scenariorun_{uuid.uuid4()}"
31
+
32
+
33
+ def generate_scenario_id() -> str:
34
+ """
35
+ Generates a new scenario ID.
36
+
37
+ Returns:
38
+ str: A new scenario ID.
39
+ """
40
+ return f"scenario_{uuid.uuid4()}"
41
+
42
+
43
+ def get_batch_run_id() -> str:
14
44
  """
15
- Gets or creates a batch run ID for the current scenario execution.
16
-
17
- The batch run ID is consistent across all scenarios in the same process
18
- execution, allowing grouping of related scenario runs. This is useful
19
- for tracking and reporting on batches of scenarios run together.
20
-
45
+ Gets the batch run ID. If it's not set, it will be generated.
46
+ It can be set via the SCENARIO_BATCH_RUN_ID environment variable.
47
+
21
48
  Returns:
22
- str: A unique batch run ID that persists for the process lifetime
23
-
24
- Example:
25
- ```python
26
- # All scenarios in same process will share this ID
27
- batch_id = get_or_create_batch_run_id()
28
- print(f"Running scenario in batch: {batch_id}")
29
- ```
30
- """
31
-
49
+ str: The batch run ID.
50
+ """
32
51
  # Check if batch ID already exists in environment
33
- if not os.environ.get("SCENARIO_BATCH_ID"):
52
+ batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
53
+ if not batch_run_id:
34
54
  # Generate new batch ID if not set
35
- os.environ["SCENARIO_BATCH_ID"] = f"batch-run-{uuid.uuid4()}"
36
-
37
- return os.environ["SCENARIO_BATCH_ID"]
55
+ batch_run_id = f"scenariobatchrun_{uuid.uuid4()}"
56
+ os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
38
57
 
58
+ return batch_run_id
39
59
 
40
- def generate_scenario_run_id() -> str:
60
+
61
+ def generate_message_id() -> str:
62
+ """
63
+ Generates a new message ID.
64
+
65
+ Returns:
66
+ str: A new message ID.
67
+ """
68
+ return f"scenariomsg_{uuid.uuid4()}"
69
+
70
+
71
+ def safe_parse_uuid(id_str: str) -> bool:
41
72
  """
42
- Generates a unique scenario run ID for a single scenario execution.
43
-
44
- Each scenario run gets a unique identifier that distinguishes it from
45
- other runs, even within the same batch. This is used for tracking
46
- individual scenario executions and correlating events.
47
-
73
+ Safely parses a UUID string.
74
+
75
+ Args:
76
+ id_str: The UUID string to parse.
77
+
78
+ Returns:
79
+ bool: True if the UUID string is valid, false otherwise.
80
+ """
81
+ try:
82
+ uuid.UUID(id_str)
83
+ return True
84
+ except (ValueError, TypeError):
85
+ return False
86
+
87
+
88
+ # Backward compatibility aliases
89
+ def get_or_create_batch_run_id() -> str:
90
+ """
91
+ Backward compatibility alias for get_batch_run_id().
92
+
48
93
  Returns:
49
- str: A unique scenario run ID
50
-
51
- Example:
52
- ```python
53
- # Each scenario gets its own unique ID
54
- scenario_id = generate_scenario_run_id()
55
- print(f"Running scenario with ID: {scenario_id}")
56
- ```
57
- """
58
- return f"scenario-run-{uuid.uuid4()}"
94
+ str: The batch run ID.
95
+ """
96
+ return get_batch_run_id()
@@ -31,7 +31,7 @@ from scenario._utils import (
31
31
  print_openai_messages,
32
32
  show_spinner,
33
33
  await_if_awaitable,
34
- get_or_create_batch_run_id,
34
+ get_batch_run_id,
35
35
  generate_scenario_run_id,
36
36
  )
37
37
  from openai.types.chat import (
@@ -105,6 +105,7 @@ class ScenarioExecutor:
105
105
  event_bus: ScenarioEventBus
106
106
 
107
107
  batch_run_id: str
108
+ scenario_set_id: str
108
109
 
109
110
  def __init__(
110
111
  self,
@@ -118,6 +119,7 @@ class ScenarioExecutor:
118
119
  cache_key: Optional[str] = None,
119
120
  debug: Optional[bool] = None,
120
121
  event_bus: Optional[ScenarioEventBus] = None,
122
+ set_id: Optional[str] = None,
121
123
  ):
122
124
  """
123
125
  Initialize a scenario executor.
@@ -139,6 +141,7 @@ class ScenarioExecutor:
139
141
  debug: Whether to enable debug mode with step-by-step execution.
140
142
  Overrides global configuration for this scenario.
141
143
  event_bus: Optional event bus that will subscribe to this executor's events
144
+ set_id: Optional set identifier for grouping related scenarios
142
145
  """
143
146
  self.name = name
144
147
  self.description = description
@@ -162,7 +165,8 @@ class ScenarioExecutor:
162
165
  self.event_bus = event_bus or ScenarioEventBus()
163
166
  self.event_bus.subscribe_to_events(self._events)
164
167
 
165
- self.batch_run_id = get_or_create_batch_run_id()
168
+ self.batch_run_id = get_batch_run_id()
169
+ self.scenario_set_id = set_id or "default"
166
170
 
167
171
  @property
168
172
  def events(self) -> Observable:
@@ -702,12 +706,14 @@ class ScenarioExecutor:
702
706
  batch_run_id: Unique identifier for the batch of scenario runs
703
707
  scenario_run_id: Unique identifier for this specific scenario run
704
708
  scenario_id: Human-readable name/identifier for the scenario
709
+ scenario_set_id: Set identifier for grouping related scenarios
705
710
  timestamp: Unix timestamp in milliseconds when the event occurred
706
711
  """
707
712
 
708
713
  batch_run_id: str
709
714
  scenario_run_id: str
710
715
  scenario_id: str
716
+ scenario_set_id: str
711
717
  timestamp: int
712
718
 
713
719
  def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
@@ -727,6 +733,7 @@ class ScenarioExecutor:
727
733
  "batch_run_id": self.batch_run_id,
728
734
  "scenario_run_id": scenario_run_id,
729
735
  "scenario_id": self.name,
736
+ "scenario_set_id": self.scenario_set_id,
730
737
  "timestamp": int(time.time() * 1000),
731
738
  }
732
739
 
@@ -820,6 +827,7 @@ async def run(
820
827
  cache_key: Optional[str] = None,
821
828
  debug: Optional[bool] = None,
822
829
  script: Optional[List[ScriptStep]] = None,
830
+ set_id: Optional[str] = None,
823
831
  ) -> ScenarioResult:
824
832
  """
825
833
  High-level interface for running a scenario test.
@@ -837,6 +845,7 @@ async def run(
837
845
  cache_key: Cache key for deterministic behavior
838
846
  debug: Enable debug mode for step-by-step execution
839
847
  script: Optional script steps to control scenario flow
848
+ set_id: Optional set identifier for grouping related scenarios
840
849
 
841
850
  Returns:
842
851
  ScenarioResult containing the test outcome, conversation history,
@@ -854,7 +863,8 @@ async def run(
854
863
  my_agent,
855
864
  scenario.UserSimulatorAgent(),
856
865
  scenario.JudgeAgent(criteria=["Agent provides helpful response"])
857
- ]
866
+ ],
867
+ set_id="customer-support-tests"
858
868
  )
859
869
 
860
870
  # Scripted scenario with custom evaluations
@@ -871,7 +881,8 @@ async def run(
871
881
  scenario.agent(),
872
882
  custom_eval,
873
883
  scenario.succeed()
874
- ]
884
+ ],
885
+ set_id="integration-tests"
875
886
  )
876
887
 
877
888
  # Results analysis
@@ -889,6 +900,7 @@ async def run(
889
900
  cache_key=cache_key,
890
901
  debug=debug,
891
902
  script=script,
903
+ set_id=set_id,
892
904
  )
893
905
 
894
906
  # We'll use a thread pool to run the execution logic, we
@@ -6,7 +6,7 @@ of a scenario execution, including conversation history, turn tracking, and
6
6
  utility methods for inspecting the conversation.
7
7
  """
8
8
 
9
- from typing import List, Dict, Any, Optional, TYPE_CHECKING
9
+ from typing import List, Optional, TYPE_CHECKING
10
10
  from openai.types.chat import (
11
11
  ChatCompletionMessageParam,
12
12
  ChatCompletionMessageToolCallParam,
@@ -68,6 +68,7 @@ class ScenarioState(BaseModel):
68
68
  )
69
69
  ```
70
70
  """
71
+
71
72
  description: str
72
73
  messages: List[ChatCompletionMessageParam]
73
74
  thread_id: str