PyPI - inspect-ai - Versions diffs - 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl - Mend

inspect-ai 0.3.98py3-none-any.whl → 0.3.100py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

inspect_ai/__init__.py +2 -0
inspect_ai/_cli/log.py +1 -1
inspect_ai/_display/core/config.py +11 -5
inspect_ai/_display/core/panel.py +66 -2
inspect_ai/_display/core/textual.py +5 -2
inspect_ai/_display/plain/display.py +1 -0
inspect_ai/_display/rich/display.py +2 -2
inspect_ai/_display/textual/widgets/transcript.py +41 -1
inspect_ai/_eval/run.py +12 -4
inspect_ai/_eval/score.py +2 -4
inspect_ai/_eval/task/log.py +1 -1
inspect_ai/_eval/task/run.py +59 -81
inspect_ai/_eval/task/task.py +1 -1
inspect_ai/_util/_async.py +1 -1
inspect_ai/_util/content.py +11 -6
inspect_ai/_util/interrupt.py +2 -2
inspect_ai/_util/text.py +7 -0
inspect_ai/_util/working.py +8 -37
inspect_ai/_view/__init__.py +0 -0
inspect_ai/_view/schema.py +3 -1
inspect_ai/_view/view.py +14 -0
inspect_ai/_view/www/CLAUDE.md +15 -0
inspect_ai/_view/www/dist/assets/index.css +273 -169
inspect_ai/_view/www/dist/assets/index.js +20079 -17019
inspect_ai/_view/www/log-schema.json +122 -8
inspect_ai/_view/www/package.json +5 -1
inspect_ai/_view/www/src/@types/log.d.ts +20 -2
inspect_ai/_view/www/src/app/App.tsx +1 -15
inspect_ai/_view/www/src/app/appearance/icons.ts +4 -1
inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +24 -6
inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +0 -5
inspect_ai/_view/www/src/app/content/RenderedContent.tsx +221 -205
inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +2 -1
inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +5 -0
inspect_ai/_view/www/src/app/routing/url.ts +84 -4
inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +0 -5
inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +1 -1
inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +7 -0
inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +26 -19
inspect_ai/_view/www/src/app/samples/SampleSummaryView.module.css +1 -2
inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +8 -6
inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +0 -4
inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +3 -2
inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +2 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +2 -0
inspect_ai/_view/www/src/app/samples/chat/messages.ts +1 -0
inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -0
inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +1 -1
inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +2 -3
inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.module.css +1 -1
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +3 -2
inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +4 -5
inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +1 -3
inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +3 -4
inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.module.css +42 -0
inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +77 -0
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +27 -71
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +13 -3
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +27 -2
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +1 -0
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +21 -22
inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.module.css +45 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +223 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.module.css +10 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +258 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +187 -0
inspect_ai/_view/www/src/app/samples/transcript/state/StateEventRenderers.tsx +8 -1
inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +3 -4
inspect_ai/_view/www/src/app/samples/transcript/transform/hooks.ts +78 -0
inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +340 -135
inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +3 -0
inspect_ai/_view/www/src/app/samples/transcript/types.ts +2 -0
inspect_ai/_view/www/src/app/types.ts +5 -1
inspect_ai/_view/www/src/client/api/api-browser.ts +2 -2
inspect_ai/_view/www/src/components/LiveVirtualList.tsx +6 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +1 -1
inspect_ai/_view/www/src/components/PopOver.tsx +422 -0
inspect_ai/_view/www/src/components/PulsingDots.module.css +9 -9
inspect_ai/_view/www/src/components/PulsingDots.tsx +4 -1
inspect_ai/_view/www/src/components/StickyScroll.tsx +183 -0
inspect_ai/_view/www/src/components/TabSet.tsx +4 -0
inspect_ai/_view/www/src/state/hooks.ts +52 -2
inspect_ai/_view/www/src/state/logSlice.ts +4 -3
inspect_ai/_view/www/src/state/samplePolling.ts +8 -0
inspect_ai/_view/www/src/state/sampleSlice.ts +53 -9
inspect_ai/_view/www/src/state/scrolling.ts +152 -0
inspect_ai/_view/www/src/utils/attachments.ts +7 -0
inspect_ai/_view/www/src/utils/python.ts +18 -0
inspect_ai/_view/www/yarn.lock +269 -6
inspect_ai/agent/_react.py +12 -7
inspect_ai/agent/_run.py +46 -11
inspect_ai/analysis/beta/_dataframe/samples/table.py +19 -18
inspect_ai/log/_bundle.py +5 -3
inspect_ai/log/_log.py +3 -3
inspect_ai/log/_recorders/file.py +2 -9
inspect_ai/log/_transcript.py +1 -1
inspect_ai/model/_call_tools.py +6 -2
inspect_ai/model/_openai.py +1 -1
inspect_ai/model/_openai_responses.py +78 -39
inspect_ai/model/_openai_web_search.py +31 -0
inspect_ai/model/_providers/anthropic.py +3 -6
inspect_ai/model/_providers/azureai.py +72 -3
inspect_ai/model/_providers/openai.py +2 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/scorer/_metric.py +1 -2
inspect_ai/solver/_task_state.py +2 -2
inspect_ai/tool/_tool.py +6 -2
inspect_ai/tool/_tool_def.py +27 -4
inspect_ai/tool/_tool_info.py +2 -0
inspect_ai/tool/_tools/_web_search/_google.py +15 -4
inspect_ai/tool/_tools/_web_search/_tavily.py +35 -12
inspect_ai/tool/_tools/_web_search/_web_search.py +214 -45
inspect_ai/util/__init__.py +6 -0
inspect_ai/util/_json.py +3 -0
inspect_ai/util/_limit.py +374 -141
inspect_ai/util/_sandbox/docker/compose.py +20 -11
inspect_ai/util/_span.py +1 -1
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/METADATA +3 -3
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/RECORD +131 -117
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/top_level.txt +0 -0

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -4,6 +4,7 @@ import types
 from copy import copy
 from dataclasses import is_dataclass
 from datetime import date, datetime, time
+from enum import EnumMeta
 from logging import getLogger
 from textwrap import dedent
 from types import UnionType
@@ -172,7 +173,7 @@ async def execute_tools(
             except LimitExceededError as ex:
                 tool_error = ToolCallError(
                     "limit",
-                    f"The tool exceeded its {ex.type} limit of {ex.limit}.",
+                    f"The tool exceeded its {ex.type} limit of {ex.limit_str}.",
                 )
             except ToolParsingError as ex:
                 tool_error = ToolCallError("parsing", ex.message)
@@ -497,7 +498,7 @@ async def agent_handoff(
             ChatMessageUser(
                 content=(
                     f"The {agent_name} exceeded its {limit_error.type} limit of "
-                    f"{limit_error.limit}."
+                    f"{limit_error.limit_str}."
                 )
             )
         )
@@ -548,6 +549,7 @@ def tools_info(
                     name=tool.name,
                     description=tool.description,
                     parameters=tool.parameters,
+                    options=tool.options,
                 )
             )
     return tools_info
@@ -652,6 +654,8 @@ def tool_param(type_hint: Type[Any], input: Any) -> Any:
             return type_hint(**dataclass_data)
         elif issubclass(type_hint, BaseModel):
             return type_hint(**input)
+        elif isinstance(type_hint, EnumMeta):
+            return type_hint(input)
         else:
             return input
     elif origin is list or origin is List:

inspect_ai/model/_openai.py CHANGED Viewed

@@ -594,7 +594,7 @@ def chat_choices_from_openai(
             stop_reason=as_stop_reason(choice.finish_reason),
             logprobs=(
                 Logprobs(**choice.logprobs.model_dump())
-                if choice.logprobs is not None
+                if choice.logprobs and choice.logprobs.content is not None
                 else None
             ),
         )

inspect_ai/model/_openai_responses.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
-from itertools import chain
-from typing import TypedDict, cast
+from typing import Sequence, TypedDict, cast
 from openai.types.responses import (
     FunctionToolParam,
@@ -8,6 +7,8 @@ from openai.types.responses import (
     ResponseComputerToolCallParam,
     ResponseFunctionToolCall,
     ResponseFunctionToolCallParam,
+    ResponseFunctionWebSearch,
+    ResponseFunctionWebSearchParam,
     ResponseInputContentParam,
     ResponseInputImageParam,
     ResponseInputItemParam,
@@ -51,6 +52,7 @@ from inspect_ai.model._openai_computer_use import (
     maybe_computer_use_preview_tool,
     tool_call_from_openai_computer_tool_call,
 )
+from inspect_ai.model._openai_web_search import maybe_web_search_tool
 from inspect_ai.tool._tool_call import ToolCall
 from inspect_ai.tool._tool_choice import ToolChoice
 from inspect_ai.tool._tool_info import ToolInfo
@@ -174,6 +176,12 @@ def openai_responses_chat_choices(
     return [ChatCompletionChoice(message=message, stop_reason=stop_reason)]
+def is_native_tool_configured(
+    tools: Sequence[ToolInfo], config: GenerateConfig
+) -> bool:
+    return any(_maybe_native_tool_param(tool, config) is not None for tool in tools)
 # The next two function perform transformations between OpenAI types an Inspect
 # ChatMessageAssistant. Here is a diagram that helps visualize the transforms.
 # ┌───────────────────────────┐    ┌───────────────────────────┐    ┌───────────────────────────┐
@@ -207,7 +215,6 @@ def openai_responses_chat_choices(
 class _AssistantInternal(TypedDict):
-    output_message_id: str | None
     tool_message_ids: dict[str, str]
@@ -237,17 +244,17 @@ def _chat_message_assistant_from_openai_response(
     # collect output and tool calls
     message_content: list[Content] = []
     tool_calls: list[ToolCall] = []
-    internal = _AssistantInternal(output_message_id=None, tool_message_ids={})
+    internal = _AssistantInternal(tool_message_ids={})
     for output in response.output:
         match output:
             case ResponseOutputMessage(content=content, id=id):
-                assert internal["output_message_id"] is None, "Multiple message outputs"
-                internal["output_message_id"] = id
                 message_content.extend(
                     [
-                        ContentText(text=c.text)
+                        ContentText(text=c.text, internal={"id": id})
                         if isinstance(c, ResponseOutputText)
-                        else ContentText(text=c.refusal, refusal=True)
+                        else ContentText(
+                            text=c.refusal, refusal=True, internal={"id": id}
+                        )
                         for c in content
                     ]
                 )
@@ -277,6 +284,13 @@ def _chat_message_assistant_from_openai_response(
                         tool_calls.append(
                             tool_call_from_openai_computer_tool_call(output)
                         )
+                    case ResponseFunctionWebSearch():
+                        # We don't currently capture this since the model did the
+                        # "tool call" internally. It's conceivable that could be
+                        # forced to include it in `.internal` in the future, but
+                        # for now we just ignore it.
+                        # {"id":"ws_682cdcec3fa88198bc10b38fafefbd5e077e89e31fd4a3d5","status":"completed","type":"web_search_call"}
+                        pass
                     case _:
                         raise ValueError(f"Unexpected output type: {output.__class__}")
@@ -304,25 +318,39 @@ def _openai_input_items_from_chat_message_assistant(
     field of the `ChatMessageAssistant` to help it provide the proper id's the
     items in the returned list.
     """
-    (output_message_id, tool_message_ids) = _ids_from_assistant_internal(message)
+    tool_message_ids = _ids_from_assistant_internal(message)
     # we want to prevent yielding output messages in the case where we have an
     # 'internal' field (so the message came from the model API as opposed to
-    # being user synthesized) AND there is no output_message_id (indicating that
-    # when reading the message from the server we didn't find output). this could
-    # happen e.g. when a react() agent sets the output.completion in response
+    # being user synthesized) AND there are no ContentText items with message IDs
+    # (indicating that when reading the message from the server we didn't find output).
+    # this could happen e.g. when a react() agent sets the output.completion in response
     # to a submit() tool call
-    suppress_output_message = message.internal is not None and output_message_id is None
+    content_items: list[ContentText | ContentReasoning] = (
+        [ContentText(text=message.content)]
+        if isinstance(message.content, str)
+        else [
+            c for c in message.content if isinstance(c, ContentText | ContentReasoning)
+        ]
+    )
+    has_content_with_ids = any(
+        isinstance(c, ContentText)
+        and isinstance(c.internal, dict)
+        and "id" in c.internal
+        for c in content_items
+    )
+    suppress_output_message = message.internal is not None and not has_content_with_ids
     # if we are not storing messages on the server then blank these out
     if not store:
-        output_message_id = None
         tool_message_ids = {}
-    # items to return -- ensure we use a single output message (and just chain
-    # additional content on to it)
+    # items to return
     items: list[ResponseInputItemParam] = []
-    output_message: ResponseOutputMessageParam | None = None
+    # group content by message ID
+    messages_by_id: dict[
+        str | None, list[ResponseOutputTextParam | ResponseOutputRefusalParam]
+    ] = {}
     for content in (
         list[ContentText | ContentReasoning]([ContentText(text=message.content)])
@@ -352,6 +380,14 @@ def _openai_input_items_from_chat_message_assistant(
                 if suppress_output_message:
                     continue
+                # get the message ID from ContentText.modelJson
+                content_message_id: str | None = None
+                if isinstance(content.internal, dict) and "id" in content.internal:
+                    id_value = content.internal["id"]
+                    content_message_id = id_value if isinstance(id_value, str) else None
+                else:
+                    content_message_id = None
                 new_content = (
                     ResponseOutputRefusalParam(type="refusal", refusal=text)
                     if refusal
@@ -359,22 +395,24 @@ def _openai_input_items_from_chat_message_assistant(
                         type="output_text", text=text, annotations=[]
                     )
                 )
-                if output_message is None:
-                    output_message = ResponseOutputMessageParam(
-                        type="message",
-                        role="assistant",
-                        # this actually can be `None`, and it will in fact be `None` when the
-                        # assistant message is synthesized by the scaffold as opposed to being
-                        # replayed from the model (or when store=False)
-                        id=output_message_id,  # type: ignore[typeddict-item]
-                        content=[new_content],
-                        status="completed",
-                    )
-                    items.append(output_message)
-                else:
-                    output_message["content"] = chain(
-                        output_message["content"], [new_content]
-                    )
+                if content_message_id not in messages_by_id:
+                    messages_by_id[content_message_id] = []
+                messages_by_id[content_message_id].append(new_content)
+    # create ResponseOutputMessage for each unique ID
+    for msg_id, content_list in messages_by_id.items():
+        output_message = ResponseOutputMessageParam(
+            type="message",
+            role="assistant",
+            # this actually can be `None`, and it will in fact be `None` when the
+            # assistant message is synthesized by the scaffold as opposed to being
+            # replayed from the model (or when store=False)
+            id=msg_id,  # type: ignore[typeddict-item]
+            content=content_list,
+            status="completed",
+        )
+        items.append(output_message)
     return items + _tool_call_items_from_assistant_message(message, tool_message_ids)
@@ -399,7 +437,7 @@ def _maybe_native_tool_param(
 ) -> ToolParam | None:
     return (
         (
-            maybe_computer_use_preview_tool(tool)
+            maybe_computer_use_preview_tool(tool) or maybe_web_search_tool(tool)
             # or self.text_editor_tool_param(tool)
             # or self.bash_tool_param(tool)
         )
@@ -442,22 +480,23 @@ def _tool_call_items_from_assistant_message(
 def _ids_from_assistant_internal(
     message: ChatMessageAssistant,
-) -> tuple[str | None, dict[str, str]]:
+) -> dict[str, str]:
     if message.internal is not None:
         assert isinstance(message.internal, dict), (
             "OpenAI ChatMessageAssistant internal must be an _AssistantInternal"
         )
         internal = cast(_AssistantInternal, message.internal)
-        return (internal["output_message_id"], internal["tool_message_ids"])
+        return internal["tool_message_ids"]
     else:
-        return None, {}
+        return {}
 _ResponseToolCallParam = (
-    ResponseFunctionToolCallParam | ResponseComputerToolCallParam
+    ResponseFunctionToolCallParam
+    | ResponseComputerToolCallParam
+    | ResponseFunctionWebSearchParam
     # | ResponseFileSearchToolCallParam
     # | ResponseFunctionToolCallParam
-    # | ResponseFunctionWebSearchParam
 )

inspect_ai/model/_openai_web_search.py ADDED Viewed

@@ -0,0 +1,31 @@
+from typing import cast
+from openai.types.responses import WebSearchTool, WebSearchToolParam
+from inspect_ai.tool._tool_info import ToolInfo
+def maybe_web_search_tool(tool: ToolInfo) -> WebSearchToolParam | None:
+    return (
+        _web_search_tool(tool.options["openai"])
+        if tool.name == "web_search" and tool.options and "openai" in tool.options
+        else None
+    )
+def _web_search_tool(maybe_openai_options: object) -> WebSearchToolParam:
+    if maybe_openai_options is None:
+        maybe_openai_options = {}
+    elif not isinstance(maybe_openai_options, dict):
+        raise TypeError(
+            f"Expected a dictionary for openai_options, got {type(maybe_openai_options)}"
+        )
+    openai_options = (
+        WebSearchTool.model_validate(
+            {"type": "web_search_preview", **maybe_openai_options}
+        )
+        if maybe_openai_options
+        else WebSearchTool(type="web_search_preview")
+    )
+    return cast(WebSearchToolParam, openai_options.model_dump(exclude_none=True))

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -356,12 +356,9 @@ class AnthropicAPI(ModelAPI):
         if isinstance(ex, APIStatusError):
             # for unknown reasons, anthropic does not always set status_code == 529
             # for "overloaded_error" so we check for it explicitly
-            if (
-                isinstance(ex.body, dict)
-                and isinstance(ex.body.get("error", {}), dict)
-                and ex.body.get("error", {}).get("type", "") == "overloaded_error"
-            ):
-                return True
+            if isinstance(ex.body, dict):
+                if "overloaded_error" in str(ex.body):
+                    return True
             # standard http status code checking
             return is_retryable_http_status(ex.status_code)

inspect_ai/model/_providers/azureai.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import functools
 import json
 import os
 from copy import copy
@@ -151,7 +152,7 @@ class AzureAIAPI(ModelAPI):
         # prepare request
         request = dict(
-            messages=await chat_request_messages(input, handler),
+            messages=await chat_request_messages(input, handler, self.is_mistral()),
             **self.completion_params(config),
         )
         # newer versions of vllm reject requests with tools or tool_choice if the
@@ -280,9 +281,77 @@ class AzureAIAPI(ModelAPI):
 async def chat_request_messages(
-    messages: list[ChatMessage], handler: ChatAPIHandler | None
+    messages: list[ChatMessage],
+    handler: ChatAPIHandler | None,
+    is_mistral: bool = False,
+) -> list[ChatRequestMessage]:
+    chat_messages = [
+        await chat_request_message(message, handler) for message in messages
+    ]
+    if is_mistral:
+        chat_messages = functools.reduce(mistral_message_reducer, chat_messages, [])
+    return chat_messages
+def mistral_message_reducer(
+    messages: list[ChatRequestMessage],
+    message: ChatRequestMessage,
 ) -> list[ChatRequestMessage]:
-    return [await chat_request_message(message, handler) for message in messages]
+    """Fold any user messages found immediately after tool messages into the last tool message."""
+    if (
+        len(messages) > 0
+        and isinstance(messages[-1], ToolMessage)
+        and isinstance(message, UserMessage)
+    ):
+        messages[-1] = fold_user_message_into_tool_message(messages[-1], message)
+    else:
+        messages.append(message)
+    return messages
+def fold_user_message_into_tool_message(
+    tool_message: ToolMessage,
+    user_message: UserMessage,
+) -> ToolMessage:
+    def convert_content_items_to_string(list_content: list[ContentItem]) -> str:
+        if not all(
+            isinstance(item, (TextContentItem | ImageContentItem))
+            for item in list_content
+        ):
+            raise TypeError(
+                "Expected all items to be TextContentItem or ImageContentItem"
+            )
+        parts = []
+        for item in list_content:
+            if isinstance(item, TextContentItem):
+                parts.append(item.text)
+            elif isinstance(item, ImageContentItem):
+                parts.append(f"[Image: {item.image_url.url}]")
+            else:
+                raise ValueError("Unexpected content item type")
+        return "".join(parts)
+    def normalise_content(
+        content: str | list[ContentItem] | None,
+    ) -> str | None:
+        return (
+            None
+            if content is None
+            else convert_content_items_to_string(content)
+            if isinstance(content, list)
+            else content
+        )
+    tool_content = normalise_content(tool_message.content)
+    user_content = normalise_content(user_message.content)
+    return ToolMessage(
+        content=(tool_content or "") + (user_content or ""),
+        tool_call_id=tool_message.tool_call_id,
+    )
 async def chat_request_message(

inspect_ai/model/_providers/openai.py CHANGED Viewed

@@ -42,6 +42,7 @@ from .._openai import (
     openai_media_filter,
     openai_should_retry,
 )
+from .._openai_responses import is_native_tool_configured
 from .openai_o1 import generate_o1
 from .util import environment_prerequisite_error, model_base_url
@@ -241,7 +242,7 @@ class OpenAIAPI(ModelAPI):
                 tools=tools,
                 **self.completion_params(config, False),
             )
-        elif self.responses_api:
+        elif self.responses_api or is_native_tool_configured(tools, config):
             return await generate_responses(
                 client=self.client,
                 http_hooks=self._http_hooks,

inspect_ai/model/_providers/providers.py CHANGED Viewed

@@ -105,7 +105,7 @@ def vertex() -> type[ModelAPI]:
 def google() -> type[ModelAPI]:
     FEATURE = "Google API"
     PACKAGE = "google-genai"
-    MIN_VERSION = "1.12.1"
+    MIN_VERSION = "1.16.1"
     # verify we have the package
     try:

inspect_ai/scorer/_metric.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import (
     Protocol,
     Type,
     Union,
-    cast,
     overload,
     runtime_checkable,
 )
@@ -356,7 +355,7 @@ def metric(
             )
             return metric
-        return metric_register(cast(Callable[P, Metric], metric_wrapper), metric_name)
+        return metric_register(metric_wrapper, metric_name)
     # for decorators with an explicit name, one more wrapper for the name
     if isinstance(name, str):

inspect_ai/solver/_task_state.py CHANGED Viewed

@@ -290,7 +290,7 @@ class TaskState:
         return self._tools
     @tools.setter
-    def tools(self, tools: list[Tool | ToolDef]) -> None:
+    def tools(self, tools: Sequence[Tool | ToolDef]) -> None:
         self._tools.clear()
         for tool in tools:
             self._tools.append(tool if isinstance(tool, Tool) else tool.as_tool())
@@ -353,7 +353,7 @@ class TaskState:
     def completed(self) -> bool:
         """Is the task completed.
-        Additionally, checks message and token limits and raises if they are exceeded, and also checks for an operator interrupt of the sample.
+        Additionally, checks for an operator interrupt of the sample.
         """
         from inspect_ai.log._samples import set_active_sample_total_messages

inspect_ai/tool/_tool.py CHANGED Viewed

@@ -224,13 +224,15 @@ def tool(
             tool_parallel = parallel
             tool_viewer = viewer
             tool_model_input = model_input
+            tool_options: dict[str, object] | None = None
             if is_registry_object(tool):
-                _, _, reg_parallel, reg_viewer, reg_model_input = tool_registry_info(
-                    tool
+                _, _, reg_parallel, reg_viewer, reg_model_input, options = (
+                    tool_registry_info(tool)
                 )
                 tool_parallel = parallel and reg_parallel
                 tool_viewer = viewer or reg_viewer
                 tool_model_input = model_input or reg_model_input
+                tool_options = options
             # tag the object
             registry_tag(
@@ -247,6 +249,7 @@ def tool(
                             tool_model_input
                             or getattr(tool, TOOL_INIT_MODEL_INPUT, None)
                         ),
+                        TOOL_OPTIONS: tool_options,
                     },
                 ),
                 *args,
@@ -267,6 +270,7 @@ TOOL_PROMPT = "prompt"
 TOOL_PARALLEL = "parallel"
 TOOL_VIEWER = "viewer"
 TOOL_MODEL_INPUT = "model_input"
+TOOL_OPTIONS = "options"
 TOOL_INIT_MODEL_INPUT = "__TOOL_INIT_MODEL_INPUT__"

inspect_ai/tool/_tool_def.py CHANGED Viewed

@@ -16,6 +16,7 @@ from inspect_ai._util.registry import (
 from ._tool import (
     TOOL_MODEL_INPUT,
+    TOOL_OPTIONS,
     TOOL_PARALLEL,
     TOOL_PROMPT,
     TOOL_VIEWER,
@@ -44,6 +45,7 @@ class ToolDef:
         parallel: bool | None = None,
         viewer: ToolCallViewer | None = None,
         model_input: ToolCallModelInput | None = None,
+        options: dict[str, object] | None = None,
     ) -> None:
         """Create a tool definition.
@@ -59,6 +61,8 @@ class ToolDef:
           viewer: Optional tool call viewer implementation.
           model_input: Optional function that determines how
               tool call results are played back as model input.
+          options: Optional property bag that can be used by the model provider
+              to customize the implementation of the tool
         Returns:
           Tool definition.
@@ -82,6 +86,7 @@ class ToolDef:
             self.parallel = parallel if parallel is not None else tdef.parallel
             self.viewer = viewer or tdef.viewer
             self.model_input = model_input or tdef.model_input
+            self.options = options or tdef.options
         # if its not a tool then extract tool_info if all fields have not
         # been provided explicitly
@@ -112,6 +117,7 @@ class ToolDef:
             self.parallel = parallel is not False
             self.viewer = viewer
             self.model_input = model_input
+            self.options = options
     tool: Callable[..., Any]
     """Callable to execute tool."""
@@ -134,13 +140,20 @@ class ToolDef:
     model_input: ToolCallModelInput | None
     """Custom model input presenter for tool calls."""
+    options: dict[str, object] | None = None
+    """Optional property bag that can be used by the model provider to customize the implementation of the tool"""
     def as_tool(self) -> Tool:
         """Convert a ToolDef to a Tool."""
         tool = self.tool
         info = RegistryInfo(
             type="tool",
             name=self.name,
-            metadata={TOOL_PARALLEL: self.parallel, TOOL_VIEWER: self.viewer},
+            metadata={
+                TOOL_PARALLEL: self.parallel,
+                TOOL_VIEWER: self.viewer,
+                TOOL_OPTIONS: self.options,
+            },
         )
         set_registry_info(tool, info)
         set_registry_params(tool, {})
@@ -189,11 +202,12 @@ class ToolDefFields(NamedTuple):
     parallel: bool
     viewer: ToolCallViewer | None
     model_input: ToolCallModelInput | None
+    options: dict[str, object] | None
 def tool_def_fields(tool: Tool) -> ToolDefFields:
     # get tool_info
-    name, prompt, parallel, viewer, model_input = tool_registry_info(tool)
+    name, prompt, parallel, viewer, model_input, options = tool_registry_info(tool)
     tool_info = parse_tool_info(tool)
     # if there is a description then append any prompt to the
@@ -234,19 +248,28 @@ def tool_def_fields(tool: Tool) -> ToolDefFields:
         parallel=parallel,
         viewer=viewer,
         model_input=model_input,
+        options=options,
     )
 def tool_registry_info(
     tool: Tool,
-) -> tuple[str, str | None, bool, ToolCallViewer | None, ToolCallModelInput | None]:
+) -> tuple[
+    str,
+    str | None,
+    bool,
+    ToolCallViewer | None,
+    ToolCallModelInput | None,
+    dict[str, object] | None,
+]:
     info = registry_info(tool)
     name = info.name.split("/")[-1]
     prompt = info.metadata.get(TOOL_PROMPT, None)
     parallel = info.metadata.get(TOOL_PARALLEL, True)
     viewer = info.metadata.get(TOOL_VIEWER, None)
     model_input = info.metadata.get(TOOL_MODEL_INPUT, None)
-    return name, prompt, parallel, viewer, model_input
+    options = info.metadata.get(TOOL_OPTIONS, None)
+    return name, prompt, parallel, viewer, model_input, options
 def validate_tool_parameters(tool_name: str, parameters: dict[str, ToolParam]) -> None:

inspect_ai/tool/_tool_info.py CHANGED Viewed

@@ -49,6 +49,8 @@ class ToolInfo(BaseModel):
     """Short description of tool."""
     parameters: ToolParams = Field(default_factory=ToolParams)
     """JSON Schema of tool parameters object."""
+    options: dict[str, object] | None = Field(default=None)
+    """Optional property bag that can be used by the model provider to customize the implementation of the tool"""
 def parse_tool_info(func: Callable[..., Any]) -> ToolInfo:

inspect_ai/tool/_tools/_web_search/_google.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Awaitable, Callable
 import anyio
 import httpx
 from bs4 import BeautifulSoup, NavigableString
+from pydantic import BaseModel
 from tenacity import (
     retry,
     retry_if_exception,
@@ -23,6 +24,13 @@ Page Content: {text}
 """
+class GoogleOptions(BaseModel):
+    num_results: int | None = None
+    max_provider_calls: int | None = None
+    max_connections: int | None = None
+    model: str | None = None
 class SearchLink:
     def __init__(self, url: str, snippet: str) -> None:
         self.url = url
@@ -42,11 +50,14 @@ def maybe_get_google_api_keys() -> tuple[str, str] | None:
 def google_search_provider(
-    num_results: int,
-    max_provider_calls: int,
-    max_connections: int,
-    model: str | None,
+    in_options: dict[str, object] | None = None,
 ) -> Callable[[str], Awaitable[str | None]]:
+    options = GoogleOptions.model_validate(in_options) if in_options else None
+    num_results = (options.num_results if options else None) or 3
+    max_provider_calls = (options.max_provider_calls if options else None) or 3
+    max_connections = (options.max_connections if options else None) or 10
+    model = options.model if options else None
     keys = maybe_get_google_api_keys()
     if not keys:
         raise PrerequisiteError(

inspect-ai 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl

inspect-ai 0.3.98py3-none-any.whl → 0.3.100py3-none-any.whl