PyPI - langwatch-scenario - Versions diffs - 0.7.2__py3-none-any.whl → 0.7.3__py3-none-any.whl - Mend

langwatch-scenario 0.7.2py3-none-any.whl → 0.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langwatch-scenario
-Version: 0.7.2
+Version: 0.7.3
 Summary: The end-to-end agent testing library
 Author-email: LangWatch Team <support@langwatch.ai>
 License: MIT
@@ -450,6 +450,48 @@ class MyAgent:
 This will cache any function call you decorate when running the tests and make them repeatable, hashed by the function arguments, the scenario being executed, and the `cache_key` you provided. You can exclude arguments that should not be hashed for the cache key by naming them in the `ignore` argument.
+## Grouping Your Sets and Batches
+While optional, we strongly recommend setting stable identifiers for your scenarios, sets, and batches for better organization and tracking in LangWatch.
+- **set_id**: Groups related scenarios into a test suite. This corresponds to the "Simulation Set" in the UI.
+- **batch_run_id**: Groups all scenarios that were run together in a single execution (e.g., a single CI job). This is automatically generated but can be overridden.
+```python
+import os
+result = await scenario.run(
+    name="my first scenario",
+    description="A simple test to see if the agent responds.",
+    set_id="my-test-suite",
+    agents=[
+        scenario.Agent(my_agent),
+        scenario.UserSimulatorAgent(),
+    ]
+)
+```
+You can also set the `batch_run_id` using environment variables for CI/CD integration:
+```python
+import os
+# Set batch ID for CI/CD integration
+os.environ["SCENARIO_BATCH_RUN_ID"] = os.environ.get("GITHUB_RUN_ID", "local-run")
+result = await scenario.run(
+    name="my first scenario",
+    description="A simple test to see if the agent responds.",
+    set_id="my-test-suite",
+    agents=[
+        scenario.Agent(my_agent),
+        scenario.UserSimulatorAgent(),
+    ]
+)
+```
+The `batch_run_id` is automatically generated for each test run, but you can also set it globally using the `SCENARIO_BATCH_RUN_ID` environment variable.
 ## Disable Output
 You can remove the `-s` flag from pytest to hide the output during test, which will only show up if the test fails. Alternatively, you can set `verbose=False` in the `Scenario.configure` method or in the specific scenario you are running.

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/RECORD RENAMED Viewed

@@ -5,8 +5,8 @@ scenario/cache.py,sha256=J6s6Sia_Ce6TrnsInlhfxm6SF8tygo3sH-_cQCRX1WA,6213
 scenario/config.py,sha256=xhUuXH-sThwPTmJNSuajKxX-WC_tcFwJ1jZc119DswA,6093
 scenario/judge_agent.py,sha256=d8vORsqpUPIA4yhlBTv5Yi4I2MdcfXselYBTFvfZx-4,16221
 scenario/pytest_plugin.py,sha256=DGrpgB6e71eq8QXWWxwLjAKNhiyYyzfzZ0L5Ax8iEmo,11317
-scenario/scenario_executor.py,sha256=EDRFgvyR7vUCX0fC6nMA5loJi3EUAvvyPWc-vCJSpII,32564
-scenario/scenario_state.py,sha256=dQDjazem-dn1c5mw6TwngEu6Tv_cHwEzemepsPBy2f0,7039
+scenario/scenario_executor.py,sha256=2ZPy2cywwEMIbUfBP1jHN__Ffjf5WGB144MX2SNr5IM,33101
+scenario/scenario_state.py,sha256=LWGqEQN-Yz0DIiC-TyMRHd-9rEiuBVUHKllMmKv-qGg,7029
 scenario/script.py,sha256=A0N5pP0l4FFn1xdKc78U_wkwWhEWH3EFeU_LRDtNyEI,12241
 scenario/types.py,sha256=qH5KFzJBDG1fEJB_qFRVtL3EZulxq3G1mztYczIzIAY,9613
 scenario/user_simulator_agent.py,sha256=fhwi8W44s343BGrjJXSJw960wcK7MgwTg-epxR1bqHo,9088
@@ -226,12 +226,12 @@ scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_req
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/search_response.py,sha256=zDYmJ8bFBSJyF9D3cEn_ffrey-ITIfwr-_7eu72zLyk,2832
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/timestamps.py,sha256=-nRKUPZTAJQNxiKz128xF7DKgZNbFo4G3mr5xNXrkaw,2173
 scenario/_generated/langwatch_api_client/lang_watch_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
-scenario/_utils/__init__.py,sha256=5XkMVG8-g0D8PRtmcJ_PJakmPpUXdDX_gNf_jyILUXQ,999
-scenario/_utils/ids.py,sha256=K1iPuJgPh3gX9HCrDZGqK5lDgdwZXfOBF1YXVOWNHRg,1843
+scenario/_utils/__init__.py,sha256=ptNVzmjhypznnozdNIiuBDHZ0NLqtp7xhio9kEDovWQ,1311
+scenario/_utils/ids.py,sha256=v3JS8J7vrFuubK5bXJviU-BVZoLGWINCN1hUyAO9NZw,2074
 scenario/_utils/message_conversion.py,sha256=AWHn31E7J0mz9sBXWruVVAgtsrJz1R_xEf-dGbX6jjs,3636
 scenario/_utils/utils.py,sha256=msQgUWaLh3U9jIIHmxkEbOaklga63AF0KJzsaKa_mZc,14008
-langwatch_scenario-0.7.2.dist-info/METADATA,sha256=hHOIOIP9w51i6daij7jmQER_gMfGK_mHc8HrnO9GO90,18588
-langwatch_scenario-0.7.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langwatch_scenario-0.7.2.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
-langwatch_scenario-0.7.2.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
-langwatch_scenario-0.7.2.dist-info/RECORD,,
+langwatch_scenario-0.7.3.dist-info/METADATA,sha256=5vyo2hMNsKaJKUbDBxUv7-YSD85ufDqczfLS6yp5b1Y,19959
+langwatch_scenario-0.7.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+langwatch_scenario-0.7.3.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
+langwatch_scenario-0.7.3.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
+langwatch_scenario-0.7.3.dist-info/RECORD,,

scenario/_utils/__init__.py CHANGED Viewed

@@ -7,7 +7,15 @@ for better user experience during scenario execution.
 """
 from .message_conversion import convert_agent_return_types_to_openai_messages
-from .ids import get_or_create_batch_run_id, generate_scenario_run_id
+from .ids import (
+    get_batch_run_id,
+    get_or_create_batch_run_id,  # Backward compatibility
+    generate_scenario_run_id,
+    generate_scenario_id,
+    generate_thread_id,
+    generate_message_id,
+    safe_parse_uuid,
+)
 from .utils import (
     SerializableAndPydanticEncoder,
     SerializableWithStringFallback,
@@ -20,8 +28,13 @@ from .utils import (
 __all__ = [
     "convert_agent_return_types_to_openai_messages",
-    "get_or_create_batch_run_id",
+    "get_batch_run_id",
+    "get_or_create_batch_run_id",  # Backward compatibility
     "generate_scenario_run_id",
+    "generate_scenario_id",
+    "generate_thread_id",
+    "generate_message_id",
+    "safe_parse_uuid",
     "SerializableAndPydanticEncoder",
     "SerializableWithStringFallback",
     "print_openai_messages",
@@ -29,4 +42,4 @@ __all__ = [
     "check_valid_return_type",
     "reverse_roles",
     "await_if_awaitable",
-]
+]

scenario/_utils/ids.py CHANGED Viewed

@@ -10,49 +10,87 @@ import os
 import uuid
-def get_or_create_batch_run_id() -> str:
+def generate_thread_id() -> str:
+    """
+    Generates a new thread ID.
+    Returns:
+        str: A new thread ID.
+    """
+    return f"thread_{uuid.uuid4()}"
+def generate_scenario_run_id() -> str:
+    """
+    Generates a new scenario run ID.
+    Returns:
+        str: A new scenario run ID.
+    """
+    return f"scenariorun_{uuid.uuid4()}"
+def generate_scenario_id() -> str:
+    """
+    Generates a new scenario ID.
+    Returns:
+        str: A new scenario ID.
+    """
+    return f"scenario_{uuid.uuid4()}"
+def get_batch_run_id() -> str:
     """
-    Gets or creates a batch run ID for the current scenario execution.
-    The batch run ID is consistent across all scenarios in the same process
-    execution, allowing grouping of related scenario runs. This is useful
-    for tracking and reporting on batches of scenarios run together.
+    Gets the batch run ID. If it's not set, it will be generated.
+    It can be set via the SCENARIO_BATCH_RUN_ID environment variable.
     Returns:
-        str: A unique batch run ID that persists for the process lifetime
-    Example:
-        ```python
-        # All scenarios in same process will share this ID
-        batch_id = get_or_create_batch_run_id()
-        print(f"Running scenario in batch: {batch_id}")
-        ```
-    """
+        str: The batch run ID.
+    """
     # Check if batch ID already exists in environment
-    if not os.environ.get("SCENARIO_BATCH_ID"):
+    batch_run_id = os.environ.get("SCENARIO_BATCH_RUN_ID")
+    if not batch_run_id:
         # Generate new batch ID if not set
-        os.environ["SCENARIO_BATCH_ID"] = f"batch-run-{uuid.uuid4()}"
-    return os.environ["SCENARIO_BATCH_ID"]
+        batch_run_id = f"scenariobatchrun_{uuid.uuid4()}"
+        os.environ["SCENARIO_BATCH_RUN_ID"] = batch_run_id
+    return batch_run_id
-def generate_scenario_run_id() -> str:
+def generate_message_id() -> str:
+    """
+    Generates a new message ID.
+    Returns:
+        str: A new message ID.
+    """
+    return f"scenariomsg_{uuid.uuid4()}"
+def safe_parse_uuid(id_str: str) -> bool:
     """
-    Generates a unique scenario run ID for a single scenario execution.
-    Each scenario run gets a unique identifier that distinguishes it from
-    other runs, even within the same batch. This is used for tracking
-    individual scenario executions and correlating events.
+    Safely parses a UUID string.
+    Args:
+        id_str: The UUID string to parse.
+    Returns:
+        bool: True if the UUID string is valid, false otherwise.
+    """
+    try:
+        uuid.UUID(id_str)
+        return True
+    except (ValueError, TypeError):
+        return False
+# Backward compatibility aliases
+def get_or_create_batch_run_id() -> str:
+    """
+    Backward compatibility alias for get_batch_run_id().
     Returns:
-        str: A unique scenario run ID
-    Example:
-        ```python
-        # Each scenario gets its own unique ID
-        scenario_id = generate_scenario_run_id()
-        print(f"Running scenario with ID: {scenario_id}")
-        ```
-    """
-    return f"scenario-run-{uuid.uuid4()}"
+        str: The batch run ID.
+    """
+    return get_batch_run_id()

scenario/scenario_executor.py CHANGED Viewed

@@ -31,7 +31,7 @@ from scenario._utils import (
     print_openai_messages,
     show_spinner,
     await_if_awaitable,
-    get_or_create_batch_run_id,
+    get_batch_run_id,
     generate_scenario_run_id,
 )
 from openai.types.chat import (
@@ -105,6 +105,7 @@ class ScenarioExecutor:
     event_bus: ScenarioEventBus
     batch_run_id: str
+    scenario_set_id: str
     def __init__(
         self,
@@ -118,6 +119,7 @@ class ScenarioExecutor:
         cache_key: Optional[str] = None,
         debug: Optional[bool] = None,
         event_bus: Optional[ScenarioEventBus] = None,
+        set_id: Optional[str] = None,
     ):
         """
         Initialize a scenario executor.
@@ -139,6 +141,7 @@ class ScenarioExecutor:
             debug: Whether to enable debug mode with step-by-step execution.
                   Overrides global configuration for this scenario.
             event_bus: Optional event bus that will subscribe to this executor's events
+            set_id: Optional set identifier for grouping related scenarios
         """
         self.name = name
         self.description = description
@@ -162,7 +165,8 @@ class ScenarioExecutor:
         self.event_bus = event_bus or ScenarioEventBus()
         self.event_bus.subscribe_to_events(self._events)
-        self.batch_run_id = get_or_create_batch_run_id()
+        self.batch_run_id = get_batch_run_id()
+        self.scenario_set_id = set_id or "default"
     @property
     def events(self) -> Observable:
@@ -702,12 +706,14 @@ class ScenarioExecutor:
             batch_run_id: Unique identifier for the batch of scenario runs
             scenario_run_id: Unique identifier for this specific scenario run
             scenario_id: Human-readable name/identifier for the scenario
+            scenario_set_id: Set identifier for grouping related scenarios
             timestamp: Unix timestamp in milliseconds when the event occurred
         """
         batch_run_id: str
         scenario_run_id: str
         scenario_id: str
+        scenario_set_id: str
         timestamp: int
     def _create_common_event_fields(self, scenario_run_id: str) -> _CommonEventFields:
@@ -727,6 +733,7 @@ class ScenarioExecutor:
             "batch_run_id": self.batch_run_id,
             "scenario_run_id": scenario_run_id,
             "scenario_id": self.name,
+            "scenario_set_id": self.scenario_set_id,
             "timestamp": int(time.time() * 1000),
         }
@@ -820,6 +827,7 @@ async def run(
     cache_key: Optional[str] = None,
     debug: Optional[bool] = None,
     script: Optional[List[ScriptStep]] = None,
+    set_id: Optional[str] = None,
 ) -> ScenarioResult:
     """
     High-level interface for running a scenario test.
@@ -837,6 +845,7 @@ async def run(
         cache_key: Cache key for deterministic behavior
         debug: Enable debug mode for step-by-step execution
         script: Optional script steps to control scenario flow
+        set_id: Optional set identifier for grouping related scenarios
     Returns:
         ScenarioResult containing the test outcome, conversation history,
@@ -854,7 +863,8 @@ async def run(
                my_agent,
                scenario.UserSimulatorAgent(),
                scenario.JudgeAgent(criteria=["Agent provides helpful response"])
-           ]
+           ],
+           set_id="customer-support-tests"
         )
         # Scripted scenario with custom evaluations
@@ -871,7 +881,8 @@ async def run(
                scenario.agent(),
                custom_eval,
                scenario.succeed()
-           ]
+           ],
+           set_id="integration-tests"
         )
         # Results analysis
@@ -889,6 +900,7 @@ async def run(
         cache_key=cache_key,
         debug=debug,
         script=script,
+        set_id=set_id,
     )
     # We'll use a thread pool to run the execution logic, we

scenario/scenario_state.py CHANGED Viewed

@@ -6,7 +6,7 @@ of a scenario execution, including conversation history, turn tracking, and
 utility methods for inspecting the conversation.
 """
-from typing import List, Dict, Any, Optional, TYPE_CHECKING
+from typing import List, Optional, TYPE_CHECKING
 from openai.types.chat import (
     ChatCompletionMessageParam,
     ChatCompletionMessageToolCallParam,
@@ -68,6 +68,7 @@ class ScenarioState(BaseModel):
         )
         ```
     """
     description: str
     messages: List[ChatCompletionMessageParam]
     thread_id: str

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langwatch_scenario-0.7.2.dist-info → langwatch_scenario-0.7.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

langwatch-scenario 0.7.2__py3-none-any.whl → 0.7.3__py3-none-any.whl

langwatch-scenario 0.7.2py3-none-any.whl → 0.7.3py3-none-any.whl