langwatch-scenario 0.7.8__py3-none-any.whl → 0.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langwatch-scenario
3
- Version: 0.7.8
3
+ Version: 0.7.9
4
4
  Summary: The end-to-end agent testing library
5
5
  Author-email: LangWatch Team <support@langwatch.ai>
6
6
  License: MIT
@@ -457,7 +457,7 @@ This will cache any function call you decorate when running the tests and make t
457
457
  While optional, we strongly recommend setting stable identifiers for your scenarios, sets, and batches for better organization and tracking in LangWatch.
458
458
 
459
459
  - **set_id**: Groups related scenarios into a test suite. This corresponds to the "Simulation Set" in the UI.
460
- - **batch_run_id**: Groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
460
+ - **SCENARIO_BATCH_RUN_ID**: Env variable that groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
461
461
 
462
462
  ```python
463
463
  import os
@@ -2,20 +2,21 @@ scenario/__init__.py,sha256=4WO8TjY8Lc0NhYL7b9LvaB1xCBqwUkLuI0uIA6PQP6c,4223
2
2
  scenario/_error_messages.py,sha256=QVFSbhzsVNGz2GOBOaoQFW6w6AOyZCWLTt0ySWPfnGw,3882
3
3
  scenario/agent_adapter.py,sha256=PoY2KQqYuqzIIb3-nhIU-MPXwHJc1vmwdweMy7ut-hk,4255
4
4
  scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
5
- scenario/judge_agent.py,sha256=gWRWzIfHBjAYBRXant6n5fL_E2P3A2IGNvIyp9nUb30,16728
6
- scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
7
- scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
5
+ scenario/judge_agent.py,sha256=TSwykEWhoBA9F__sUsSuUMpu7pOkT1lIJo8YlEj2eiA,16759
6
+ scenario/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ scenario/pytest_plugin.py,sha256=wRCuGD9uwrrLt2fY15zK6mnmY9W_dO_m0WalPJYE5II,11491
8
+ scenario/scenario_executor.py,sha256=_GRpFpw_WtgtaGpxWh0A0HNNf-aU78PdIiVdgEFm9MY,33136
8
9
  scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
9
10
  scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
10
11
  scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
11
12
  scenario/user_simulator_agent.py,sha256=kqnSd4_gytzEwtkc06r58UdE1EycZBzejRPzfORDjdo,9619
12
13
  scenario/_events/__init__.py,sha256=4cj6H9zuXzvWhT2P2JNdjWzeF1PUepTjqIDw85Vid9s,1500
13
- scenario/_events/event_alert_message_logger.py,sha256=K0Pu76Gd36lGEEYh8e8r7NMt7J-OQhbw0cZmiwutCOE,3591
14
- scenario/_events/event_bus.py,sha256=KFN0OxAQIQXIk_tVrorDoN_YLKVK9dos5SXFALstHgE,9809
15
- scenario/_events/event_reporter.py,sha256=4uND_kdPBXe-aUWCdSj4BLrMA33TDnbZzokAEOU3_08,3771
14
+ scenario/_events/event_alert_message_logger.py,sha256=n2W3uT8y4x6KKL3H9Ez6CfzJOFlvOfvjDKsdhHUJkxs,2787
15
+ scenario/_events/event_bus.py,sha256=IsKNsClF1JFYj728EcxX1hw_KbfDkfJq3Y2Kv4h94n4,9871
16
+ scenario/_events/event_reporter.py,sha256=-6NNbBMy_FYr1O-1FuZ6eIUnLuI8NGRMUr0pybLJrCI,3873
16
17
  scenario/_events/events.py,sha256=UtEGY-_1B0LrwpgsNKgrvJBZhRtxuj3K_i6ZBfF7E4Q,6387
17
18
  scenario/_events/messages.py,sha256=quwP2OkeaGasNOoaV8GUeosZVKc5XDsde08T0xx_YQo,2297
18
- scenario/_events/utils.py,sha256=SproqiwjhLWAW7p82EirCgawpxAo0ksW1pBB4mKkcEs,3436
19
+ scenario/_events/utils.py,sha256=KKqWFGkj4XtofKxM2yi-DBhBQp8wQOdls48iPHGCmUY,3473
19
20
  scenario/_generated/langwatch_api_client/README.md,sha256=Az5f2L4ChOnG_ZtrdBagzRVgeTCtBkbD_S5cIeAry2o,5424
20
21
  scenario/_generated/langwatch_api_client/pyproject.toml,sha256=Z8wxuGp4H9BJYVVJB8diW7rRU9XYxtPfw9mU4_wq4cA,560
21
22
  scenario/_generated/langwatch_api_client/lang_watch_api_client/__init__.py,sha256=vVrn17y-3l3fOqeJk8aN3GlStRm2fo0f313l_0LtJNs,368
@@ -226,16 +227,16 @@ scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_req
226
227
  scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py,sha256=zDYmJ8bFBSJyF9D3cEn_ffrey-ITIfwr-_7eu72zLyk,2832
227
228
  scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py,sha256=-nRKUPZTAJQNxiKz128xF7DKgZNbFo4G3mr5xNXrkaw,2173
228
229
  scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
229
- scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1311
230
- scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
230
+ scenario/_utils/__init__.py,sha256=xPVjLXnHTTq9fuRFh5lsMvwtIpEeJ3jy1vf5yTUMPsc,1313
231
+ scenario/_utils/ids.py,sha256=W4tVMCf9ky0KLTDA_qOfErNhb4tCmxwa8zEuo1K1ZuY,2071
231
232
  scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
232
233
  scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
233
234
  scenario/config/__init__.py,sha256=b2X_bqkIrd7jZY9dRrXk2wOqoPe87Nl_SRGuZhlolxA,1123
234
235
  scenario/config/langwatch.py,sha256=ijWchFbUsLbQooAZmwyTw4rxfRLQseZ1GoVSiPPbzpw,1677
235
236
  scenario/config/model.py,sha256=T4HYA79CW1NxXDkFlyftYR6JzZcowbtIx0H-ijxRyfg,1297
236
- scenario/config/scenario.py,sha256=tVVnsUgG6Z0hYZiTDX-GGZz8l8co1HhyTqJUJNPinBk,5184
237
- langwatch_scenario-0.7.8.dist-info/METADATA,sha256=q7Rk73qwl5ZzaRTEF9IWxLzgCBniCMO8Ku240jVyBLY,20003
238
- langwatch_scenario-0.7.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
239
- langwatch_scenario-0.7.8.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
240
- langwatch_scenario-0.7.8.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
241
- langwatch_scenario-0.7.8.dist-info/RECORD,,
237
+ scenario/config/scenario.py,sha256=6jrtcm0Fo7FpxQta7QIKdGMgl7cXrn374Inzx29hRuk,5406
238
+ langwatch_scenario-0.7.9.dist-info/METADATA,sha256=0s-yAn8iE1N-5dbqugYFpSl8btZrTyyDgWQDat8szxI,20030
239
+ langwatch_scenario-0.7.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
240
+ langwatch_scenario-0.7.9.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
241
+ langwatch_scenario-0.7.9.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
242
+ langwatch_scenario-0.7.9.dist-info/RECORD,,
@@ -1,5 +1,8 @@
1
1
  import os
2
+ import webbrowser
2
3
  from typing import Set
4
+
5
+ from ..config.scenario import ScenarioConfig
3
6
  from .._utils.ids import get_batch_run_id
4
7
 
5
8
 
@@ -49,35 +52,13 @@ class EventAlertMessageLogger:
49
52
 
50
53
  if not os.getenv("LANGWATCH_API_KEY"):
51
54
  print(f"\n{separator}")
52
- print("🚀 LangWatch Simulation Reporting")
55
+ print("🎭 Running Scenario Tests")
53
56
  print(f"{separator}")
54
- print("➡️ API key not configured")
57
+ print("➡️ LangWatch API key not configured")
55
58
  print(" Simulations will only output final results")
56
59
  print("")
57
60
  print("💡 To visualize conversations in real time:")
58
61
  print(" • Set LANGWATCH_API_KEY environment variable")
59
- print(" • Or configure apiKey in scenario.config.js")
60
- print("")
61
- print(f"📦 Batch Run ID: {batch_run_id}")
62
- print("")
63
- print("🔇 To disable these messages:")
64
- print(" • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
65
- print(f"{separator}\n")
66
- else:
67
- endpoint = os.getenv("LANGWATCH_ENDPOINT", "https://app.langwatch.ai")
68
- api_key = os.getenv("LANGWATCH_API_KEY", "")
69
-
70
- print(f"\n{separator}")
71
- print("🚀 LangWatch Simulation Reporting")
72
- print(f"{separator}")
73
- print("✅ Simulation reporting enabled")
74
- print(f" Endpoint: {endpoint}")
75
- print(f" API Key: {'Configured' if api_key else 'Not configured'}")
76
- print("")
77
- print(f"📦 Batch Run ID: {batch_run_id}")
78
- print("")
79
- print("🔇 To disable these messages:")
80
- print(" • Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true")
81
62
  print(f"{separator}\n")
82
63
 
83
64
  def _display_watch_message(self, set_url: str) -> None:
@@ -86,10 +67,15 @@ class EventAlertMessageLogger:
86
67
  batch_url = f"{set_url}/{get_batch_run_id()}"
87
68
 
88
69
  print(f"\n{separator}")
89
- print("👀 Watch Your Simulation Live")
70
+ print("🎭 Running Scenario Tests")
90
71
  print(f"{separator}")
91
- print("🌐 Open in your browser:")
92
- print(f" Scenario Set: {set_url}")
93
- print(f" Batch Run: {batch_url}")
94
- print("")
72
+ print(f"Follow it live: {batch_url}")
95
73
  print(f"{separator}\n")
74
+
75
+ config = ScenarioConfig.default_config
76
+ if config and not config.headless:
77
+ # Open the URL in the default browser (cross-platform)
78
+ try:
79
+ webbrowser.open(batch_url)
80
+ except Exception:
81
+ pass
@@ -3,6 +3,7 @@ from typing import Optional, Any, Dict
3
3
  from .events import ScenarioEvent
4
4
  from .event_reporter import EventReporter
5
5
  from .event_alert_message_logger import EventAlertMessageLogger
6
+ from ..config.scenario import ScenarioConfig
6
7
 
7
8
  import asyncio
8
9
  import queue
@@ -35,7 +36,9 @@ class ScenarioEventBus:
35
36
  """
36
37
 
37
38
  def __init__(
38
- self, event_reporter: Optional[EventReporter] = None, max_retries: int = 3
39
+ self,
40
+ event_reporter: Optional[EventReporter] = None,
41
+ max_retries: int = 3,
39
42
  ):
40
43
  """
41
44
  Initialize the event bus with optional event reporter and retry configuration.
@@ -3,7 +3,7 @@ import httpx
3
3
  from typing import Optional, Dict, Any
4
4
  from .events import ScenarioEvent
5
5
  from .event_alert_message_logger import EventAlertMessageLogger
6
- from scenario.config import LangWatchSettings
6
+ from scenario.config import LangWatchSettings, ScenarioConfig
7
7
 
8
8
 
9
9
  class EventReporter:
@@ -26,7 +26,11 @@ class EventReporter:
26
26
  reporter = EventReporter(api_key="your-api-key")
27
27
  """
28
28
 
29
- def __init__(self, endpoint: Optional[str] = None, api_key: Optional[str] = None):
29
+ def __init__(
30
+ self,
31
+ endpoint: Optional[str] = None,
32
+ api_key: Optional[str] = None,
33
+ ):
30
34
  # Load settings from environment variables
31
35
  langwatch_settings = LangWatchSettings()
32
36
 
@@ -69,6 +73,7 @@ class EventReporter:
69
73
  "Content-Type": "application/json",
70
74
  "X-Auth-Token": self.api_key,
71
75
  },
76
+ timeout=httpx.Timeout(30.0),
72
77
  )
73
78
  self.logger.info(
74
79
  f"[{event_type}] POST response status: {response.status_code} ({event.scenario_run_id})"
@@ -92,7 +97,7 @@ class EventReporter:
92
97
  )
93
98
  except Exception as error:
94
99
  self.logger.error(
95
- f"[{event_type}] Event POST error: {error}, event={event}, endpoint={self.endpoint}"
100
+ f"[{event_type}] Event POST error: {repr(error)}, event={event}, endpoint={self.endpoint}"
96
101
  )
97
102
 
98
103
  return result
scenario/_events/utils.py CHANGED
@@ -10,7 +10,7 @@ from .messages import (
10
10
  FunctionCall,
11
11
  )
12
12
  from typing import List
13
- import uuid
13
+ from pksuid import PKSUID
14
14
 
15
15
  def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessageParam]) -> list[MessageType]:
16
16
  """
@@ -33,7 +33,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
33
33
 
34
34
  for i, message in enumerate(messages):
35
35
  # Generate unique ID for each message
36
- message_id = message.get("id") or str(uuid.uuid4())
36
+ message_id = message.get("id") or str(PKSUID('scenariomsg'))
37
37
 
38
38
  role = message.get("role")
39
39
  content = message.get("content")
@@ -54,7 +54,7 @@ def convert_messages_to_api_client_messages(messages: list[ChatCompletionMessage
54
54
  if tool_calls:
55
55
  for tool_call in tool_calls:
56
56
  api_tool_calls.append(ToolCall(
57
- id=tool_call.get("id", str(uuid.uuid4())),
57
+ id=tool_call.get("id", str(PKSUID('scenariotoolcall'))),
58
58
  type_="function",
59
59
  function=FunctionCall(
60
60
  name=tool_call["function"].get("name", "unknown"),
@@ -14,7 +14,7 @@ from .ids import (
14
14
  generate_scenario_id,
15
15
  generate_thread_id,
16
16
  generate_message_id,
17
- safe_parse_uuid,
17
+ safe_parse_ksuid,
18
18
  )
19
19
  from .utils import (
20
20
  SerializableAndPydanticEncoder,
@@ -34,7 +34,7 @@ __all__ = [
34
34
  "generate_scenario_id",
35
35
  "generate_thread_id",
36
36
  "generate_message_id",
37
- "safe_parse_uuid",
37
+ "safe_parse_ksuid",
38
38
  "SerializableAndPydanticEncoder",
39
39
  "SerializableWithStringFallback",
40
40
  "print_openai_messages",
scenario/_utils/ids.py CHANGED
@@ -7,7 +7,7 @@ and scenario tracking.
7
7
  """
8
8
 
9
9
  import os
10
- import uuid
10
+ from pksuid import PKSUID
11
11
 
12
12
 
13
13
  def generate_thread_id() -> str:
@@ -17,7 +17,7 @@ def generate_thread_id() -> str:
17
17
  Returns:
18
18
  str: A new thread ID.
19
19
  """
20
- return f"thread_{uuid.uuid4()}"
20
+ return f"{PKSUID('scenariothread')}"
21
21
 
22
22
 
23
23
  def generate_scenario_run_id() -> str:
@@ -27,7 +27,7 @@ def generate_scenario_run_id() -> str:
27
27
  Returns:
28
28
  str: A new scenario run ID.
29
29
  """
30
- return f"scenariorun_{uuid.uuid4()}"
30
+ return f"{PKSUID('scenariorun')}"
31
31
 
32
32
 
33
33
  def generate_scenario_id() -> str:
@@ -37,7 +37,7 @@ def generate_scenario_id() -> str:
37
37
  Returns:
38
38
  str: A new scenario ID.
39
39
  """
40
- return f"scenario_{uuid.uuid4()}"
40
+ return f"{PKSUID('scenario')}"
41
41
 
42
42
 
43
43
  def get_batch_run_id() -> str:
@@ -52,7 +52,7 @@ def get_batch_run_id() -> str:
52
52
  batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
53
53
  if not batch_run_id:
54
54
  # Generate new batch ID if not set
55
- batch_run_id = f"scenariobatchrun_{uuid.uuid4()}"
55
+ batch_run_id = f"{PKSUID('scenariobatch')}"
56
56
  os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
57
57
 
58
58
  return batch_run_id
@@ -65,23 +65,23 @@ def generate_message_id() -> str:
65
65
  Returns:
66
66
  str: A new message ID.
67
67
  """
68
- return f"scenariomsg_{uuid.uuid4()}"
68
+ return f"{PKSUID('scenariomsg')}"
69
69
 
70
70
 
71
- def safe_parse_uuid(id_str: str) -> bool:
71
+ def safe_parse_ksuid(id_str: str) -> bool:
72
72
  """
73
- Safely parses a UUID string.
73
+ Safely parses a Ksuid string.
74
74
 
75
75
  Args:
76
- id_str: The UUID string to parse.
76
+ id_str: The Ksuid string to parse.
77
77
 
78
78
  Returns:
79
- bool: True if the UUID string is valid, false otherwise.
79
+ bool: True if the Ksuid string is valid, false otherwise.
80
80
  """
81
81
  try:
82
- uuid.UUID(id_str)
82
+ PKSUID.parse(id_str)
83
83
  return True
84
- except (ValueError, TypeError):
84
+ except Exception:
85
85
  return False
86
86
 
87
87
 
@@ -5,6 +5,7 @@ This module provides the main configuration class for customizing the behavior
5
5
  of the Scenario testing framework, including execution parameters and debugging options.
6
6
  """
7
7
 
8
+ import os
8
9
  from typing import Optional, Union, ClassVar
9
10
  from pydantic import BaseModel
10
11
 
@@ -53,6 +54,11 @@ class ScenarioConfig(BaseModel):
53
54
  verbose: Optional[Union[bool, int]] = True
54
55
  cache_key: Optional[str] = None
55
56
  debug: Optional[bool] = False
57
+ headless: Optional[bool] = os.getenv("SCENARIO_HEADLESS", "false").lower() not in [
58
+ "false",
59
+ "0",
60
+ "",
61
+ ]
56
62
 
57
63
  default_config: ClassVar[Optional["ScenarioConfig"]] = None
58
64
 
@@ -64,6 +70,7 @@ class ScenarioConfig(BaseModel):
64
70
  verbose: Optional[Union[bool, int]] = None,
65
71
  cache_key: Optional[str] = None,
66
72
  debug: Optional[bool] = None,
73
+ headless: Optional[bool] = None,
67
74
  ) -> None:
68
75
  """
69
76
  Set global configuration settings for all scenario executions.
@@ -107,6 +114,7 @@ class ScenarioConfig(BaseModel):
107
114
  verbose=verbose,
108
115
  cache_key=cache_key,
109
116
  debug=debug,
117
+ headless=headless,
110
118
  )
111
119
  )
112
120
 
scenario/judge_agent.py CHANGED
@@ -398,7 +398,7 @@ if you don't have enough information to make a verdict, say inconclusive with ma
398
398
  failed_criteria = [
399
399
  self.criteria[idx]
400
400
  for idx, criterion in enumerate(criteria.values())
401
- if criterion == False
401
+ if criterion == False or criterion == "inconclusive"
402
402
  ]
403
403
 
404
404
  # Return the appropriate ScenarioResult based on the verdict
scenario/py.typed ADDED
File without changes
scenario/pytest_plugin.py CHANGED
@@ -199,6 +199,8 @@ class ScenarioReporter:
199
199
  # Store the original run method
200
200
  original_run = ScenarioExecutor.run
201
201
 
202
+ def pytest_addoption(parser):
203
+ parser.addoption("--headless", action="store_true")
202
204
 
203
205
  @pytest.hookimpl(trylast=True)
204
206
  def pytest_configure(config):
@@ -240,6 +242,9 @@ def pytest_configure(config):
240
242
  print(colored("\nScenario debug mode enabled (--debug).", "yellow"))
241
243
  ScenarioConfig.configure(verbose=True, debug=True)
242
244
 
245
+ if config.getoption("--headless"):
246
+ ScenarioConfig.configure(headless=True)
247
+
243
248
  # Create a global reporter instance
244
249
  config._scenario_reporter = ScenarioReporter()
245
250
 
@@ -153,6 +153,7 @@ class ScenarioExecutor:
153
153
  verbose=verbose,
154
154
  cache_key=cache_key,
155
155
  debug=debug,
156
+ headless=None,
156
157
  )
157
158
  self.config = (ScenarioConfig.default_config or ScenarioConfig()).merge(config)
158
159
 
@@ -198,7 +199,7 @@ class ScenarioExecutor:
198
199
  self._state = ScenarioState(
199
200
  description=self.description,
200
201
  messages=[],
201
- thread_id=str(PKSUID("thread")),
202
+ thread_id=str(PKSUID("scenariothread")),
202
203
  current_turn=0,
203
204
  config=self.config,
204
205
  _executor=self,