PyPI - inspect-ai - Versions diffs - 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl - Mend

inspect-ai 0.3.92py3-none-any.whl → 0.3.94py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

inspect_ai/_cli/eval.py +27 -0
inspect_ai/_display/textual/widgets/samples.py +3 -3
inspect_ai/_display/textual/widgets/transcript.py +3 -29
inspect_ai/_eval/eval.py +19 -2
inspect_ai/_eval/evalset.py +4 -1
inspect_ai/_eval/run.py +41 -0
inspect_ai/_eval/task/generate.py +38 -44
inspect_ai/_eval/task/log.py +26 -28
inspect_ai/_eval/task/run.py +23 -27
inspect_ai/_util/answer.py +26 -0
inspect_ai/_util/constants.py +0 -1
inspect_ai/_util/local_server.py +398 -0
inspect_ai/_util/working.py +10 -4
inspect_ai/_view/www/dist/assets/index.css +173 -159
inspect_ai/_view/www/dist/assets/index.js +1417 -1142
inspect_ai/_view/www/log-schema.json +379 -3
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/@types/log.d.ts +93 -14
inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
inspect_ai/_view/www/src/components/Card.css +0 -1
inspect_ai/_view/www/src/constants.ts +2 -0
inspect_ai/_view/www/src/utils/numeric.ts +17 -0
inspect_ai/agent/_agent.py +3 -3
inspect_ai/agent/_as_solver.py +22 -12
inspect_ai/agent/_as_tool.py +20 -6
inspect_ai/agent/_handoff.py +12 -1
inspect_ai/agent/_react.py +4 -3
inspect_ai/agent/_run.py +16 -3
inspect_ai/agent/_types.py +9 -0
inspect_ai/dataset/_dataset.py +6 -3
inspect_ai/log/__init__.py +14 -0
inspect_ai/log/_convert.py +4 -9
inspect_ai/log/_file.py +56 -0
inspect_ai/log/_log.py +99 -0
inspect_ai/log/_recorders/__init__.py +2 -0
inspect_ai/log/_recorders/buffer/database.py +12 -11
inspect_ai/log/_recorders/buffer/filestore.py +2 -2
inspect_ai/log/_recorders/buffer/types.py +2 -2
inspect_ai/log/_recorders/eval.py +20 -65
inspect_ai/log/_recorders/file.py +28 -6
inspect_ai/log/_recorders/recorder.py +7 -0
inspect_ai/log/_recorders/types.py +1 -23
inspect_ai/log/_samples.py +14 -25
inspect_ai/log/_transcript.py +84 -36
inspect_ai/log/_tree.py +118 -0
inspect_ai/log/_util.py +52 -0
inspect_ai/model/__init__.py +5 -1
inspect_ai/model/_call_tools.py +72 -44
inspect_ai/model/_generate_config.py +14 -8
inspect_ai/model/_model.py +66 -88
inspect_ai/model/_model_output.py +25 -0
inspect_ai/model/_openai.py +2 -0
inspect_ai/model/_providers/anthropic.py +13 -23
inspect_ai/model/_providers/hf.py +27 -1
inspect_ai/model/_providers/openai_o1.py +8 -2
inspect_ai/model/_providers/providers.py +18 -4
inspect_ai/model/_providers/sglang.py +247 -0
inspect_ai/model/_providers/vllm.py +211 -400
inspect_ai/scorer/_choice.py +1 -2
inspect_ai/solver/__init__.py +7 -2
inspect_ai/solver/_basic_agent.py +3 -10
inspect_ai/solver/_chain.py +1 -1
inspect_ai/solver/_fork.py +1 -1
inspect_ai/solver/_multiple_choice.py +5 -22
inspect_ai/solver/_plan.py +2 -2
inspect_ai/solver/_task_state.py +26 -88
inspect_ai/solver/_transcript.py +6 -7
inspect_ai/tool/_json_rpc_helpers.py +45 -17
inspect_ai/tool/_mcp/_mcp.py +8 -5
inspect_ai/tool/_mcp/_sandbox.py +8 -2
inspect_ai/tool/_mcp/server.py +3 -1
inspect_ai/tool/_tool_call.py +4 -1
inspect_ai/tool/_tool_support_helpers.py +51 -12
inspect_ai/tool/_tools/_bash_session.py +190 -68
inspect_ai/tool/_tools/_computer/_computer.py +25 -1
inspect_ai/tool/_tools/_execute.py +4 -1
inspect_ai/tool/_tools/_text_editor.py +4 -3
inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
inspect_ai/util/__init__.py +16 -0
inspect_ai/util/_anyio.py +11 -0
inspect_ai/util/_collect.py +50 -0
inspect_ai/util/_limit.py +393 -0
inspect_ai/util/_limited_conversation.py +57 -0
inspect_ai/util/_span.py +58 -0
inspect_ai/util/_subtask.py +27 -42
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
inspect_ai/_display/core/group.py +0 -79
inspect_ai/solver/_limit.py +0 -39
inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/tool/_tools/_computer/test_args.py +0 -151
/inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0

inspect_ai/log/_tree.py ADDED Viewed

@@ -0,0 +1,118 @@
+from dataclasses import dataclass, field
+from logging import getLogger
+from typing import Iterable, Sequence, TypeAlias
+from ._transcript import Event, SpanBeginEvent, SpanEndEvent
+logger = getLogger(__name__)
+EventNode: TypeAlias = "SpanNode" | Event
+"""Node in an event tree."""
+EventTree: TypeAlias = list[EventNode]
+"""Tree of events (has invividual events and event spans)."""
+@dataclass
+class SpanNode:
+    """Event tree node representing a span of events."""
+    id: str
+    """Span id."""
+    parent_id: str | None
+    """Parent span id."""
+    type: str | None
+    """Optional 'type' field for span."""
+    name: str
+    """Span name."""
+    begin: SpanBeginEvent
+    """Span begin event."""
+    end: SpanEndEvent | None = None
+    """Span end event (if any)."""
+    children: list[EventNode] = field(default_factory=list)
+    """Children in the span."""
+def event_tree(events: Sequence[Event]) -> EventTree:
+    """Build a tree representation of a sequence of events.
+    Organize events heirarchially into event spans.
+    Args:
+        events: Sequence of `Event`.
+    Returns:
+        Event tree.
+    """
+    # Convert one flat list of (possibly interleaved) events into  *forest*
+    # (list of root-level items).
+    # Pre-create one node per span so we can attach events no matter when they
+    # arrive in the file. A single forward scan guarantees that the order of
+    # `children` inside every span reflects the order in which things appeared
+    # in the transcript.
+    nodes: dict[str, SpanNode] = {
+        ev.id: SpanNode(
+            id=ev.id, parent_id=ev.parent_id, type=ev.type, name=ev.name, begin=ev
+        )
+        for ev in events
+        if isinstance(ev, SpanBeginEvent)
+    }
+    roots: list[EventNode] = []
+    # Where should an event with `span_id` go?
+    def bucket(span_id: str | None) -> list[EventNode]:
+        if span_id and span_id in nodes:
+            return nodes[span_id].children
+        return roots  # root level
+    # Single pass in original order
+    for ev in events:
+        if isinstance(ev, SpanBeginEvent):  # span starts
+            bucket(ev.parent_id).append(nodes[ev.id])
+        elif isinstance(ev, SpanEndEvent):  # span ends
+            if n := nodes.get(ev.id):
+                n.end = ev
+            else:
+                logger.warning(f"Span end event (id: {ev.id} with no span begin)")
+        else:  # ordinary event
+            bucket(ev.span_id).append(ev)
+    return roots
+def event_sequence(tree: EventTree) -> Iterable[Event]:
+    """Flatten a span forest back into a properly ordered seqeunce.
+    Args:
+        tree: Event tree
+    Returns:
+        Sequence of events.
+    """
+    for item in tree:
+        if isinstance(item, SpanNode):
+            yield item.begin
+            yield from event_sequence(item.children)
+            if item.end:
+                yield item.end
+        else:
+            yield item
+def _print_event_tree(tree: EventTree, indent: str = "") -> None:
+    for item in tree:
+        if isinstance(item, SpanNode):
+            print(f"{indent}span ({item.type}): {item.name}")
+            _print_event_tree(item.children, f"{indent}  ")
+        else:
+            print(f"{indent}{item.event}")

inspect_ai/log/_util.py ADDED Viewed

@@ -0,0 +1,52 @@
+import textwrap
+from datetime import date, datetime, time
+from typing import Any
+from inspect_ai._util.content import (
+    ContentAudio,
+    ContentImage,
+    ContentReasoning,
+    ContentText,
+    ContentVideo,
+)
+from inspect_ai.model._chat_message import ChatMessage
+def text_input_only(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
+    # Clean the input of any images
+    if isinstance(inputs, list):
+        input: list[ChatMessage] = []
+        for message in inputs:
+            if not isinstance(message.content, str):
+                filtered_content: list[
+                    ContentText
+                    | ContentReasoning
+                    | ContentImage
+                    | ContentAudio
+                    | ContentVideo
+                ] = []
+                for content in message.content:
+                    if content.type == "text":
+                        filtered_content.append(content)
+                    else:
+                        filtered_content.append(
+                            ContentText(text=f"({content.type.capitalize()})")
+                        )
+                message.content = filtered_content
+                input.append(message)
+            else:
+                input.append(message)
+        return input
+    else:
+        return inputs
+def thin_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
+    thinned: dict[str, Any] = {}
+    for key, value in metadata.items():
+        if isinstance(value, int | float | bool | date | time | datetime):
+            thinned[key] = value
+        elif isinstance(value, str):
+            thinned[key] = textwrap.shorten(value, width=1024, placeholder="...")
+    return thinned

inspect_ai/model/__init__.py CHANGED Viewed

@@ -28,7 +28,11 @@ from ._chat_message import (
     ChatMessageUser,
 )
 from ._conversation import ModelConversation
-from ._generate_config import GenerateConfig, GenerateConfigArgs, ResponseSchema
+from ._generate_config import (
+    GenerateConfig,
+    GenerateConfigArgs,
+    ResponseSchema,
+)
 from ._model import (
     Model,
     ModelAPI,

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -60,6 +60,8 @@ from inspect_ai.tool._tool_info import parse_docstring
 from inspect_ai.tool._tool_params import ToolParams
 from inspect_ai.util import OutputLimitExceededError
 from inspect_ai.util._anyio import inner_exception
+from inspect_ai.util._limit import LimitExceededError, apply_limits
+from inspect_ai.util._span import span
 from ._chat_message import (
     ChatMessage,
@@ -108,26 +110,18 @@ async def execute_tools(
     """
     message = messages[-1]
     if isinstance(message, ChatMessageAssistant) and message.tool_calls:
-        from inspect_ai.log._transcript import (
-            ToolEvent,
-            Transcript,
-            init_transcript,
-            track_store_changes,
-            transcript,
-        )
+        from inspect_ai.log._transcript import ToolEvent, transcript
         tdefs = await tool_defs(tools)
         async def call_tool_task(
             call: ToolCall,
+            event: ToolEvent,
             conversation: list[ChatMessage],
             send_stream: MemoryObjectSendStream[
                 tuple[ExecuteToolsResult, ToolEvent, Exception | None]
             ],
         ) -> None:
-            # create a transript for this call
-            init_transcript(Transcript(name=call.function))
             result: ToolResult = ""
             messages: list[ChatMessage] = []
             output: ModelOutput | None = None
@@ -135,15 +129,14 @@ async def execute_tools(
             tool_error: ToolCallError | None = None
             tool_exception: Exception | None = None
             try:
-                with track_store_changes():
-                    try:
-                        result, messages, output, agent = await call_tool(
-                            tdefs, message.text, call, conversation
-                        )
-                    # unwrap exception group
-                    except Exception as ex:
-                        inner_ex = inner_exception(ex)
-                        raise inner_ex.with_traceback(inner_ex.__traceback__)
+                try:
+                    result, messages, output, agent = await call_tool(
+                        tdefs, message.text, call, event, conversation
+                    )
+                # unwrap exception group
+                except Exception as ex:
+                    inner_ex = inner_exception(ex)
+                    raise inner_ex.with_traceback(inner_ex.__traceback__)
             except TimeoutError:
                 tool_error = ToolCallError(
@@ -171,10 +164,15 @@ async def execute_tools(
                 tool_error = ToolCallError("is_a_directory", err)
             except OutputLimitExceededError as ex:
                 tool_error = ToolCallError(
-                    "output_limit",
-                    f"The tool output limit of {ex.limit_str} was exceeded.",
+                    "limit",
+                    f"The tool exceeded its output limit of {ex.limit_str}.",
                 )
                 result = ex.truncated_output or ""
+            except LimitExceededError as ex:
+                tool_error = ToolCallError(
+                    "limit",
+                    f"The tool exceeded its {ex.type} limit of {ex.limit}.",
+                )
             except ToolParsingError as ex:
                 tool_error = ToolCallError("parsing", ex.message)
             except ToolApprovalError as ex:
@@ -221,7 +219,6 @@ async def execute_tools(
                 truncated=truncated,
                 view=call.view,
                 error=tool_error,
-                events=list(transcript().events),
                 agent=agent,
             )
@@ -264,7 +261,6 @@ async def execute_tools(
                 internal=call.internal,
                 pending=True,
             )
-            transcript()._event(event)
             # execute the tool call. if the operator cancels the
             # tool call then synthesize the appropriate message/event
@@ -274,7 +270,7 @@ async def execute_tools(
             result_exception = None
             async with anyio.create_task_group() as tg:
-                tg.start_soon(call_tool_task, call, messages, send_stream)
+                tg.start_soon(call_tool_task, call, event, messages, send_stream)
                 event._set_cancel_fn(tg.cancel_scope.cancel)
                 async with receive_stream:
                     (
@@ -300,7 +296,6 @@ async def execute_tools(
                     truncated=None,
                     view=call.view,
                     error=tool_message.error,
-                    events=[],
                 )
                 transcript().info(
                     f"Tool call '{call.function}' was cancelled by operator."
@@ -320,7 +315,6 @@ async def execute_tools(
                 result=result_event.result,
                 truncated=result_event.truncated,
                 error=result_event.error,
-                events=result_event.events,
                 waiting_time=waiting_time_end - waiting_time_start,
                 agent=result_event.agent,
                 failed=True if result_exception else None,
@@ -341,18 +335,34 @@ async def execute_tools(
 async def call_tool(
-    tools: list[ToolDef], message: str, call: ToolCall, conversation: list[ChatMessage]
+    tools: list[ToolDef],
+    message: str,
+    call: ToolCall,
+    event: BaseModel,
+    conversation: list[ChatMessage],
 ) -> tuple[ToolResult, list[ChatMessage], ModelOutput | None, str | None]:
     from inspect_ai.agent._handoff import AgentTool
+    from inspect_ai.log._transcript import SampleLimitEvent, ToolEvent, transcript
+    # dodge circular import
+    assert isinstance(event, ToolEvent)
+    # this function is responsible for transcript events so that it can
+    # put them in the right enclosure (e.g. handoff/agent/tool). This
+    # means that if we throw early we need to do the enclosure when raising.
+    async def record_tool_parsing_error(error: str) -> Exception:
+        async with span(name=call.function, type="tool"):
+            transcript()._event(event)
+        return ToolParsingError(error)
     # if there was an error parsing the ToolCall, raise that
     if call.parse_error:
-        raise ToolParsingError(call.parse_error)
+        raise await record_tool_parsing_error(call.parse_error)
     # find the tool
     tool_def = next((tool for tool in tools if tool.name == call.function), None)
     if tool_def is None:
-        raise ToolParsingError(f"Tool {call.function} not found")
+        raise await record_tool_parsing_error(f"Tool {call.function} not found")
     # if we have a tool approver, apply it now
     from inspect_ai.approval._apply import apply_tool_approval
@@ -362,14 +372,11 @@ async def call_tool(
     )
     if not approved:
         if approval and approval.decision == "terminate":
-            from inspect_ai.solver._limit import SampleLimitExceededError
-            raise SampleLimitExceededError(
-                "operator",
-                value=1,
-                limit=1,
-                message="Tool call approver requested termination.",
+            message = "Tool call approver requested termination."
+            transcript()._event(
+                SampleLimitEvent(type="operator", limit=1, message=message)
             )
+            raise LimitExceededError("operator", value=1, limit=1, message=message)
         else:
             raise ToolApprovalError(approval.explanation if approval else None)
     if approval and approval.modified:
@@ -378,7 +385,7 @@ async def call_tool(
     # validate the schema of the passed object
     validation_errors = validate_tool_input(call.arguments, tool_def.parameters)
     if validation_errors:
-        raise ToolParsingError(validation_errors)
+        raise await record_tool_parsing_error(validation_errors)
     # get arguments (with creation of dataclasses, pydantic objects, etc.)
     arguments = tool_params(call.arguments, tool_def.tool)
@@ -387,14 +394,18 @@ async def call_tool(
     with trace_action(
         logger, "Tool Call", format_function_call(tool_def.name, arguments, width=1000)
     ):
-        # agent tools get special handling
         if isinstance(tool_def.tool, AgentTool):
-            return await agent_handoff(tool_def, call, conversation)
+            async with span(tool_def.tool.name, type="handoff"):
+                async with span(name=call.function, type="tool"):
+                    transcript()._event(event)
+                    return await agent_handoff(tool_def, call, conversation)
         # normal tool call
         else:
-            result: ToolResult = await tool_def.tool(**arguments)
-            return result, [], None, None
+            async with span(name=call.function, type="tool"):
+                transcript()._event(event)
+                result: ToolResult = await tool_def.tool(**arguments)
+                return result, [], None, None
 async def agent_handoff(
@@ -454,9 +465,15 @@ async def agent_handoff(
     arguments = tool_params(arguments, agent_tool.agent)
     del arguments["state"]
-    # make the call
+    # run the agent with limits
+    limit_error: LimitExceededError | None = None
     agent_state = AgentState(messages=copy(agent_conversation))
-    agent_state = await agent_tool.agent(agent_state, **arguments)
+    try:
+        with apply_limits(agent_tool.limits):
+            async with span(name=agent_name, type="agent"):
+                agent_state = await agent_tool.agent(agent_state, **arguments)
+    except LimitExceededError as ex:
+        limit_error = ex
     # determine which messages are new and return only those (but exclude new
     # system messages as they an internal matter for the handed off to agent.
@@ -474,9 +491,20 @@ async def agent_handoff(
     if agent_tool.output_filter is not None:
         agent_messages = await agent_tool.output_filter(agent_messages)
+    if limit_error is not None:
+        agent_messages.append(
+            ChatMessageUser(
+                content=(
+                    f"The {agent_name} exceeded its {limit_error.type} limit of "
+                    f"{limit_error.limit}."
+                )
+            )
+        )
     # if we end with an assistant message then add a user message
     # so that the calling agent carries on
-    if len(agent_messages) == 0 or isinstance(agent_messages[-1], ChatMessageAssistant):
+    elif len(agent_messages) == 0 or isinstance(
+        agent_messages[-1], ChatMessageAssistant
+    ):
         agent_messages.append(
             ChatMessageUser(content=f"The {agent_name} agent has completed its work.")
         )

inspect_ai/model/_generate_config.py CHANGED Viewed

@@ -106,6 +106,9 @@ class GenerateConfigArgs(TypedDict, total=False):
     response_schema: ResponseSchema | None
     """Request a response format as JSONSchema (output should still be validated). OpenAI, Google, and Mistral only."""
+    extra_body: dict[str, Any] | None
+    """Extra body to be sent with requests to OpenAI compatible servers. OpenAI, vLLM, and SGLang only."""
 class GenerateConfig(BaseModel):
     """Model generation options."""
@@ -138,28 +141,28 @@ class GenerateConfig(BaseModel):
     """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
     frequency_penalty: float | None = Field(default=None)
-    """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
+    """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, vLLM, and SGLang only."""
     presence_penalty: float | None = Field(default=None)
-    """Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. OpenAI, Google, Grok, Groq, and vLLM only."""
+    """Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. OpenAI, Google, Grok, Groq, vLLM, and SGLang only."""
     logit_bias: dict[int, float] | None = Field(default=None)
-    """Map token Ids to an associated bias value from -100 to 100 (e.g. "42=10,43=-10"). OpenAI, Grok, and Grok only."""
+    """Map token Ids to an associated bias value from -100 to 100 (e.g. "42=10,43=-10"). OpenAI, Grok, Grok, and vLLM only."""
     seed: int | None = Field(default=None)
     """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
     top_k: int | None = Field(default=None)
-    """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, HuggingFace, and vLLM only."""
+    """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, HuggingFace, vLLM, and SGLang only."""
     num_choices: int | None = Field(default=None)
-    """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, TogetherAI, and vLLM only."""
+    """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, TogetherAI, vLLM, and SGLang only."""
     logprobs: bool | None = Field(default=None)
-    """Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
+    """Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, vLLM, and SGLang only."""
     top_logprobs: int | None = Field(default=None)
-    """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, Huggingface, and vLLM only."""
+    """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, Huggingface, vLLM, and SGLang only."""
     parallel_tool_calls: bool | None = Field(default=None)
     """Whether to enable parallel function calling during tool use (defaults to True). OpenAI and Groq only."""
@@ -190,7 +193,10 @@ class GenerateConfig(BaseModel):
     """Include reasoning in chat message history sent to generate."""
     response_schema: ResponseSchema | None = Field(default=None)
-    """Request a response format as JSONSchema (output should still be validated). OpenAI, Google, and Mistral only."""
+    """Request a response format as JSONSchema (output should still be validated). OpenAI, Google, Mistral, vLLM, and SGLang only."""
+    extra_body: dict[str, Any] | None = Field(default=None)
+    """Extra body to be sent with requests to OpenAI compatible servers. OpenAI, vLLM, and SGLang only."""
     # migrate reasoning_history as a bool
     @model_validator(mode="before")

inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl

inspect-ai 0.3.92py3-none-any.whl → 0.3.94py3-none-any.whl