PyPI - langwatch-scenario - Versions diffs - 0.4.0__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

langwatch-scenario 0.4.0py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (238) hide show

scenario/scenario_state.py CHANGED Viewed

@@ -36,7 +36,7 @@ class ScenarioState(BaseModel):
         config: Configuration settings for this scenario execution
     Example:
-        ```python
+        ```
         def check_agent_behavior(state: ScenarioState) -> None:
             # Check if the agent called a specific tool
             if state.has_tool_call("get_weather"):
@@ -87,7 +87,7 @@ class ScenarioState(BaseModel):
             message: OpenAI-compatible message to add to the conversation
         Example:
-            ```python
+            ```
             def inject_system_message(state: ScenarioState) -> None:
                 state.add_message({
                     "role": "system",
@@ -108,7 +108,7 @@ class ScenarioState(BaseModel):
             ValueError: If no messages exist in the conversation
         Example:
-            ```python
+            ```
             def check_last_response(state: ScenarioState) -> None:
                 last = state.last_message()
                 if last["role"] == "assistant":
@@ -131,7 +131,7 @@ class ScenarioState(BaseModel):
             ValueError: If no user messages exist in the conversation
         Example:
-            ```python
+            ```
             def analyze_user_intent(state: ScenarioState) -> None:
                 user_msg = state.last_user_message()
                 content = user_msg["content"]
@@ -162,7 +162,7 @@ class ScenarioState(BaseModel):
             The tool call object if found, None otherwise
         Example:
-            ```python
+            ```
             def verify_weather_call(state: ScenarioState) -> None:
                 weather_call = state.last_tool_call("get_current_weather")
                 if weather_call:
@@ -192,7 +192,7 @@ class ScenarioState(BaseModel):
             True if the tool has been called, False otherwise
         Example:
-            ```python
+            ```
             def ensure_tool_usage(state: ScenarioState) -> None:
                 # Verify the agent used required tools
                 assert state.has_tool_call("search_database")

scenario/script.py CHANGED Viewed

@@ -32,7 +32,7 @@ def message(message: ChatCompletionMessageParam) -> ScriptStep:
         ScriptStep function that can be used in scenario scripts
     Example:
-        ```python
+        ```
         result = await scenario.run(
             name="tool response test",
             description="Testing tool call responses",
@@ -76,7 +76,7 @@ def user(
         ScriptStep function that can be used in scenario scripts
     Example:
-        ```python
+        ```
         result = await scenario.run(
             name="user interaction test",
             description="Testing specific user inputs",
@@ -95,7 +95,7 @@ def user(
                 scenario.agent(),
                 # Structured user message with multimodal content
-                scenario.user({
+                scenario.message({
                     "role": "user",
                     "content": [
                         {"type": "text", "text": "What's in this image?"},
@@ -128,7 +128,7 @@ def agent(
         ScriptStep function that can be used in scenario scripts
     Example:
-        ```python
+        ```
         result = await scenario.run(
             name="agent response test",
             description="Testing agent responses",
@@ -148,7 +148,7 @@ def agent(
                 scenario.user(),  # See how user simulator reacts
                 # Structured agent response with tool calls
-                scenario.agent({
+                scenario.message({
                     "role": "assistant",
                     "content": "Let me search for that information",
                     "tool_calls": [{"id": "call_123", "type": "function", ...}]
@@ -179,7 +179,7 @@ def judge(
         ScriptStep function that can be used in scenario scripts
     Example:
-        ```python
+        ```
         result = await scenario.run(
             name="judge evaluation test",
             description="Testing judge at specific points",
@@ -238,7 +238,7 @@ def proceed(
         ScriptStep function that can be used in scenario scripts
     Example:
-        ```python
+        ```
         def log_progress(state: ScenarioState) -> None:
             print(f"Turn {state.current_turn}: {len(state.messages)} messages")
@@ -288,7 +288,7 @@ def succeed(reasoning: Optional[str] = None) -> ScriptStep:
         ScriptStep function that can be used in scenario scripts
     Example:
-        ```python
+        ```
         def custom_success_check(state: ScenarioState) -> None:
             last_msg = state.last_message()
             if "solution" in last_msg.get("content", "").lower():
@@ -331,7 +331,7 @@ def fail(reasoning: Optional[str] = None) -> ScriptStep:
         ScriptStep function that can be used in scenario scripts
     Example:
-        ```python
+        ```
         def safety_check(state: ScenarioState) -> None:
             last_msg = state.last_message()
             content = last_msg.get("content", "")

scenario/types.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from enum import Enum
-from pydantic import BaseModel, Field, SkipValidation
+from pydantic import BaseModel, SkipValidation
 from typing import (
     TYPE_CHECKING,
     Annotated,
@@ -35,6 +35,7 @@ class AgentRole(Enum):
         AGENT: Represents the agent under test that responds to user inputs
         JUDGE: Represents a judge agent that evaluates the conversation and determines success/failure
     """
     USER = "User"
     AGENT = "Agent"
     JUDGE = "Judge"
@@ -56,7 +57,7 @@ class AgentInput(BaseModel):
         scenario_state: Current state of the scenario execution
     Example:
-        ```python
+        ```
         class MyAgent(AgentAdapter):
             async def call(self, input: AgentInput) -> str:
                 # Get the latest user message
@@ -71,6 +72,7 @@ class AgentInput(BaseModel):
                 return response
         ```
     """
     thread_id: str
     # Prevent pydantic from validating/parsing the messages and causing issues: https://github.com/pydantic/pydantic/issues/9541
     messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
@@ -89,7 +91,7 @@ class AgentInput(BaseModel):
             ValueError: If no new user messages are found
         Example:
-            ```python
+            ```
             user_message = input.last_new_user_message()
             content = user_message["content"]
             ```
@@ -115,7 +117,7 @@ class AgentInput(BaseModel):
             ValueError: If no new user messages found or if the message content is not a string
         Example:
-            ```python
+            ```
             user_text = input.last_new_user_message_str()
             response = f"You said: {user_text}"
             ```
@@ -146,7 +148,7 @@ class ScenarioResult(BaseModel):
         agent_time: Time spent in agent calls in seconds (if measured)
     Example:
-        ```python
+        ```
         result = await scenario.run(
             name="weather query",
             description="User asks about weather",
@@ -168,7 +170,8 @@ class ScenarioResult(BaseModel):
     """
     success: bool
-    messages: List[ChatCompletionMessageParam]
+    # Prevent issues with slightly inconsistent message types for example when comming from Gemini right at the result level
+    messages: Annotated[List[ChatCompletionMessageParam], SkipValidation]
     reasoning: Optional[str] = None
     passed_criteria: List[str] = []
     failed_criteria: List[str] = []
@@ -193,13 +196,17 @@ AgentReturnTypes = Union[
 Union type representing all valid return types for agent adapter call methods.
 Agent adapters can return any of these types:
 - str: Simple text response
 - ChatCompletionMessageParam: Single OpenAI-compatible message
 - List[ChatCompletionMessageParam]: Multiple OpenAI-compatible messages (for multi-step responses)
 - ScenarioResult: Direct test result (typically used by judge agents to end scenarios)
 Example:
-    ```python
+    ```
     class MyAgent(AgentAdapter):
         async def call(self, input: AgentInput) -> AgentReturnTypes:
             # Can return a simple string
@@ -234,7 +241,7 @@ scenario state and can optionally return a result to end the scenario.
 The functions can be either synchronous or asynchronous.
 Example:
-    ```python
+    ```
     def check_tool_call(state: ScenarioState) -> None:
         assert state.has_tool_call("get_weather")

scenario/user_simulator_agent.py CHANGED Viewed

@@ -15,10 +15,10 @@ from litellm.files.main import ModelResponse
 from scenario.cache import scenario_cache
 from scenario.agent_adapter import AgentAdapter
-from scenario.utils import reverse_roles
+from scenario._utils.utils import reverse_roles
 from scenario.config import ModelConfig, ScenarioConfig
-from .error_messages import agent_not_configured_error_message
+from ._error_messages import agent_not_configured_error_message
 from .types import AgentInput, AgentReturnTypes, AgentRole
@@ -43,7 +43,7 @@ class UserSimulatorAgent(AgentAdapter):
         system_prompt: Custom system prompt to override default user simulation behavior
     Example:
-        ```python
+        ```
         import scenario
         # Basic user simulator with default behavior
@@ -112,7 +112,7 @@ class UserSimulatorAgent(AgentAdapter):
             Exception: If no model is configured either in parameters or global config
         Example:
-            ```python
+            ```
             # Basic user simulator
             user_sim = UserSimulatorAgent(model="openai/gpt-4.1-mini")
@@ -175,13 +175,6 @@ class UserSimulatorAgent(AgentAdapter):
         Returns:
             AgentReturnTypes: A user message in OpenAI format that continues the conversation
-        Example:
-            Given a scenario about seeking coding help and previous messages:
-            - Agent: "Hello! How can I help you today?"
-            The user simulator might generate:
-            - User: "hi, need help with python error"
         Note:
             - Messages are generated in a casual, human-like style
             - The simulator follows the scenario description to stay contextually relevant

langwatch_scenario-0.4.0.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-scenario/__init__.py,sha256=oMh5le4c4sIN2K1Ylv2xnkyKHpcOzBeqvW58fTWAFlU,7794
-scenario/agent_adapter.py,sha256=pd3BdNUWna8h_9hykn1FvcyareMzUofQKKvXaAfQluY,4338
-scenario/cache.py,sha256=iPpMmjKruLnnxCeLnRiQjiH89LhcVIfQQXKH5etU_m4,6217
-scenario/config.py,sha256=AeDbKE-_Rrxkan64tDDDynaSNyijoIKHxWaRMqGd4oY,6121
-scenario/error_messages.py,sha256=6lEx3jBGMbPx0kG0eX5zoZE-ENVM3O_ZkIbVMlnidYs,3892
-scenario/judge_agent.py,sha256=7fKK_oevXzWKXDioBjHzgGSDpS0aby3oRcrc6oaip68,16973
-scenario/pytest_plugin.py,sha256=s2M2mll9JSCSWB5SKDQIWT5DOCvzZOo_8JCCfJzyy8k,12849
-scenario/scenario_executor.py,sha256=oz7Odv41HNLcNd_7sKUW-AKKdY-on_PyVLaxpvKjrGE,27211
-scenario/scenario_state.py,sha256=I_fWoY_LvNuKCBL-b62z5bQOAI25dx55FuZNWwtIeVs,7075
-scenario/script.py,sha256=7wsHZxdSgFaYLflkV6sysDxefkkag79mySR7yp7N3ug,12278
-scenario/types.py,sha256=CsexCupg2WUi4dToYF5RqFdNIHx1JhaRaRRBs78YVd0,9498
-scenario/user_simulator_agent.py,sha256=o8sZLMWOcTf7BKgPO_a5rPnC6GgdZQe3HujqwjPzjV8,9346
-scenario/utils.py,sha256=ryJYcMoSAjVzA_f5V6Mcga5GkipYbCzaYNNpBjAQI_g,16992
-langwatch_scenario-0.4.0.dist-info/METADATA,sha256=d9tNTNioHH5_1q8oIvIABaTgC6J9XmEJR4Tjim3sFks,13827
-langwatch_scenario-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langwatch_scenario-0.4.0.dist-info/entry_points.txt,sha256=WlEnJ_gku0i18bIa3DSuGqXRX-QDQLe_s0YmRzK45TI,45
-langwatch_scenario-0.4.0.dist-info/top_level.txt,sha256=45Mn28aedJsetnBMB5xSmrJ-yo701QLH89Zlz4r1clE,9
-langwatch_scenario-0.4.0.dist-info/RECORD,,

{langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langwatch_scenario-0.4.0.dist-info → langwatch_scenario-0.7.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

/scenario/{error_messages.py → _error_messages.py} RENAMED Viewed

File without changes

langwatch-scenario 0.4.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

langwatch-scenario 0.4.0py3-none-any.whl → 0.7.1py3-none-any.whl