PyPI - inspect-ai - Versions diffs - 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl - Mend

inspect-ai 0.3.103py3-none-any.whl → 0.3.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

inspect_ai/_cli/common.py +2 -1
inspect_ai/_cli/eval.py +2 -2
inspect_ai/_display/core/active.py +3 -0
inspect_ai/_display/core/config.py +1 -0
inspect_ai/_display/core/panel.py +21 -13
inspect_ai/_display/core/results.py +3 -7
inspect_ai/_display/core/rich.py +3 -5
inspect_ai/_display/log/__init__.py +0 -0
inspect_ai/_display/log/display.py +173 -0
inspect_ai/_display/plain/display.py +2 -2
inspect_ai/_display/rich/display.py +2 -4
inspect_ai/_display/textual/app.py +1 -6
inspect_ai/_display/textual/widgets/task_detail.py +3 -14
inspect_ai/_display/textual/widgets/tasks.py +1 -1
inspect_ai/_eval/eval.py +1 -1
inspect_ai/_eval/evalset.py +2 -2
inspect_ai/_eval/registry.py +6 -1
inspect_ai/_eval/run.py +5 -1
inspect_ai/_eval/task/constants.py +1 -0
inspect_ai/_eval/task/log.py +2 -0
inspect_ai/_eval/task/run.py +1 -1
inspect_ai/_util/citation.py +88 -0
inspect_ai/_util/content.py +24 -2
inspect_ai/_util/json.py +17 -2
inspect_ai/_util/registry.py +19 -4
inspect_ai/_view/schema.py +0 -6
inspect_ai/_view/www/dist/assets/index.css +82 -24
inspect_ai/_view/www/dist/assets/index.js +10124 -9808
inspect_ai/_view/www/log-schema.json +418 -1
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/@types/log.d.ts +140 -39
inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
inspect_ai/_view/www/src/tests/README.md +2 -2
inspect_ai/_view/www/src/utils/git.ts +3 -1
inspect_ai/_view/www/src/utils/html.ts +6 -0
inspect_ai/agent/_handoff.py +3 -3
inspect_ai/log/_condense.py +5 -0
inspect_ai/log/_file.py +4 -1
inspect_ai/log/_log.py +9 -4
inspect_ai/log/_recorders/json.py +4 -2
inspect_ai/log/_util.py +2 -0
inspect_ai/model/__init__.py +14 -0
inspect_ai/model/_call_tools.py +13 -4
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_openai_responses.py +80 -34
inspect_ai/model/_providers/_anthropic_citations.py +158 -0
inspect_ai/model/_providers/_google_citations.py +100 -0
inspect_ai/model/_providers/anthropic.py +196 -34
inspect_ai/model/_providers/google.py +94 -22
inspect_ai/model/_providers/mistral.py +20 -7
inspect_ai/model/_providers/openai.py +11 -10
inspect_ai/model/_providers/openai_compatible.py +3 -2
inspect_ai/model/_providers/openai_responses.py +2 -5
inspect_ai/model/_providers/perplexity.py +123 -0
inspect_ai/model/_providers/providers.py +13 -2
inspect_ai/model/_providers/vertex.py +3 -0
inspect_ai/model/_trim.py +5 -0
inspect_ai/tool/__init__.py +14 -0
inspect_ai/tool/_mcp/_mcp.py +5 -2
inspect_ai/tool/_mcp/sampling.py +19 -3
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tool.py +10 -1
inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
inspect_ai/tool/_tools/_web_search/_google.py +22 -25
inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
inspect_ai/util/_display.py +11 -2
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_span.py +12 -1
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
/inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
/inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -22,6 +22,8 @@ from anthropic.types import (
     MessageParam,
     RedactedThinkingBlock,
     RedactedThinkingBlockParam,
+    ServerToolUseBlock,
+    ServerToolUseBlockParam,
     TextBlock,
     TextBlockParam,
     ThinkingBlock,
@@ -31,6 +33,9 @@ from anthropic.types import (
     ToolTextEditor20250124Param,
     ToolUseBlock,
     ToolUseBlockParam,
+    WebSearchTool20250305Param,
+    WebSearchToolResultBlock,
+    WebSearchToolResultBlockParam,
     message_create_params,
 )
 from anthropic.types.beta import (
@@ -43,6 +48,7 @@ from typing_extensions import override
 from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
 from inspect_ai._util.content import (
     Content,
+    ContentData,
     ContentImage,
     ContentReasoning,
     ContentText,
@@ -61,6 +67,10 @@ from .._generate_config import GenerateConfig
 from .._model import ModelAPI
 from .._model_call import ModelCall
 from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage, StopReason
+from .._providers._anthropic_citations import (
+    to_anthropic_citation,
+    to_inspect_citation,
+)
 from .util import environment_prerequisite_error, model_base_url
 from .util.hooks import HttpxHooks
@@ -70,6 +80,14 @@ ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
 INTERNAL_COMPUTER_TOOL_NAME = "computer"
+WEB_SEARCH_COMPATIBLE_MODELS = [
+    "claude-opus-4-20250514",
+    "claude-sonnet-4-20250514",
+    "claude-3-7-sonnet-20250219",
+    "claude-3-5-sonnet-latest",
+    "claude-3-5-haiku-latest",
+]
 class AnthropicAPI(ModelAPI):
     def __init__(
@@ -232,27 +250,19 @@ class AnthropicAPI(ModelAPI):
             if self.extra_body is not None:
                 request["extra_body"] = self.extra_body
-            # make request (unless overrideen, stream if we are using reasoning)
+            # make request (unless overridden, stream if we are using reasoning)
             streaming = (
                 self.is_using_thinking(config)
                 if self.streaming == "auto"
                 else self.streaming
             )
-            if streaming:
-                async with self.client.messages.stream(**request) as stream:
-                    message = await stream.get_final_message()
-            else:
-                message = await self.client.messages.create(**request, stream=False)
-            # set response for ModelCall
-            response = message.model_dump()
-            # extract output
-            output = await model_output_from_message(
-                self.client, self.service_model_name(), message, tools
+            message, output = await self._perform_request_and_continuations(
+                request, streaming, tools
             )
-            # return output and call
+            response = message.model_dump()
             return output, model_call()
         except BadRequestError as ex:
@@ -269,6 +279,50 @@ class AnthropicAPI(ModelAPI):
             else:
                 raise ex
+    async def _perform_request_and_continuations(
+        self,
+        request: dict[str, Any],
+        streaming: bool,
+        tools: list[ToolInfo],
+    ) -> tuple[Message, ModelOutput]:
+        """
+        This helper function is split out so that it can be easily call itself recursively in cases where the model requires a continuation
+        It considers the result from the initial request the "head" and the result
+        from the continuation the "tail".
+        """
+        if streaming:
+            async with self.client.messages.stream(**request) as stream:
+                head_message = await stream.get_final_message()
+        else:
+            head_message = await self.client.messages.create(**request, stream=False)
+        head_model_output, continuation_required = await model_output_from_message(
+            self.client, self.service_model_name(), head_message, tools
+        )
+        if continuation_required:
+            tail_request = dict(request)
+            tail_request["messages"] = request["messages"] + [
+                MessageParam(role=head_message.role, content=head_message.content)
+            ]
+            _, tail_model_output = await self._perform_request_and_continuations(
+                tail_request, streaming, tools
+            )
+            head_content = _content_list(head_model_output.message.content)
+            tail_content = _content_list(tail_model_output.message.content)
+            tail_model_output.message.content = head_content + tail_content
+            # TODO:
+            # It looks weird to return the head message with the tail output, but
+            # the contract for this function is that it returns the head message
+            # even when it has needed to recurse. This is because model_call()
+            # above doesn't currently support multiple requests
+            return head_message, tail_model_output
+        return head_message, head_model_output
     def completion_config(
         self, config: GenerateConfig
     ) -> tuple[dict[str, Any], dict[str, str], list[str]]:
@@ -521,7 +575,11 @@ class AnthropicAPI(ModelAPI):
         self, tool: ToolInfo, config: GenerateConfig
     ) -> Optional["ToolParamDef"]:
         return (
-            (self.computer_use_tool_param(tool) or self.text_editor_tool_param(tool))
+            (
+                self.computer_use_tool_param(tool)
+                or self.text_editor_tool_param(tool)
+                or self.web_search_tool_param(tool)
+            )
             if config.internal_tools is not False
             else None
         )
@@ -598,6 +656,49 @@ class AnthropicAPI(ModelAPI):
         else:
             return None
+    def web_search_tool_param(
+        self, tool: ToolInfo
+    ) -> WebSearchTool20250305Param | None:
+        if (
+            tool.name == "web_search"
+            and tool.options
+            and "anthropic" in tool.options
+            and self.model_name in WEB_SEARCH_COMPATIBLE_MODELS
+        ):
+            return _web_search_tool_param(tool.options["anthropic"])
+        else:
+            return None
+def _web_search_tool_param(
+    maybe_anthropic_options: object,
+) -> WebSearchTool20250305Param:
+    if maybe_anthropic_options is not None and not isinstance(
+        maybe_anthropic_options, dict
+    ):
+        raise TypeError(
+            f"Expected a dictionary for anthropic_options, got {type(maybe_anthropic_options)}"
+        )
+    result = WebSearchTool20250305Param(
+        name="web_search",
+        type="web_search_20250305",
+    )
+    if maybe_anthropic_options:
+        if "allowed_domains" in maybe_anthropic_options:
+            result["allowed_domains"] = maybe_anthropic_options["allowed_domains"]
+        if "blocked_domains" in maybe_anthropic_options:
+            result["blocked_domains"] = maybe_anthropic_options["blocked_domains"]
+        if "cache_control" in maybe_anthropic_options:
+            result["cache_control"] = maybe_anthropic_options["cache_control"]
+        if "max_uses" in maybe_anthropic_options:
+            result["max_uses"] = maybe_anthropic_options["max_uses"]
+        if "user_location" in maybe_anthropic_options:
+            result["user_location"] = maybe_anthropic_options["user_location"]
+    return result
 # tools can be either a stock tool param or a special Anthropic native use tool param
 ToolParamDef = (
@@ -605,6 +706,7 @@ ToolParamDef = (
     | BetaToolComputerUse20250124Param
     | ToolTextEditor20250124Param
     | BetaToolTextEditor20241022Param
+    | WebSearchTool20250305Param
 )
@@ -614,6 +716,7 @@ def add_cache_control(
     | BetaToolComputerUse20250124Param
     | ToolTextEditor20250124Param
     | BetaToolTextEditor20241022Param
+    | WebSearchTool20250305Param
     | dict[str, Any],
 ) -> None:
     cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
@@ -698,6 +801,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
                     | ImageBlockParam
                     | ThinkingBlockParam
                     | RedactedThinkingBlockParam
+                    | ServerToolUseBlockParam
+                    | WebSearchToolResultBlockParam
                 ]
             ) = message.error.message
             # anthropic requires that content be populated when
@@ -735,6 +840,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
             | RedactedThinkingBlockParam
             | ImageBlockParam
             | ToolUseBlockParam
+            | ServerToolUseBlockParam
+            | WebSearchToolResultBlockParam
         ] = (
             [TextBlockParam(type="text", text=message.content or NO_CONTENT)]
             if isinstance(message.content, str)
@@ -785,7 +892,7 @@ async def model_output_from_message(
     model: str,
     message: Message,
     tools: list[ToolInfo],
-) -> ModelOutput:
+) -> tuple[ModelOutput, bool]:
     # extract content and tool calls
     content: list[Content] = []
     reasoning_tokens = 0
@@ -800,7 +907,20 @@ async def model_output_from_message(
                 content_text = content_text.replace("<result>", "").replace(
                     "</result>", ""
                 )
-            content.append(ContentText(type="text", text=content_text))
+            content.append(
+                ContentText(
+                    type="text",
+                    text=content_text,
+                    citations=(
+                        [
+                            to_inspect_citation(citation)
+                            for citation in content_block.citations
+                        ]
+                        if content_block.citations
+                        else None
+                    ),
+                )
+            )
         elif isinstance(content_block, ToolUseBlock):
             tool_calls = tool_calls or []
             (tool_name, internal_name) = _names_for_tool_call(content_block.name, tools)
@@ -812,6 +932,10 @@ async def model_output_from_message(
                     internal=internal_name,
                 )
             )
+        elif isinstance(content_block, ServerToolUseBlock):
+            content.append(ContentData(data=content_block.model_dump()))
+        elif isinstance(content_block, WebSearchToolResultBlock):
+            content.append(ContentData(data=content_block.model_dump()))
         elif isinstance(content_block, RedactedThinkingBlock):
             content.append(
                 ContentReasoning(reasoning=content_block.data, redacted=True)
@@ -827,11 +951,12 @@ async def model_output_from_message(
             )
     # resolve choice
+    stop_reason, pause_turn = message_stop_reason(message)
     choice = ChatCompletionChoice(
         message=ChatMessageAssistant(
             content=content, tool_calls=tool_calls, model=model, source="generate"
         ),
-        stop_reason=message_stop_reason(message),
+        stop_reason=stop_reason,
     )
     # return ModelOutput
@@ -844,17 +969,20 @@ async def model_output_from_message(
         + (input_tokens_cache_read or 0)
         + message.usage.output_tokens  # includes reasoning tokens
     )
-    return ModelOutput(
-        model=message.model,
-        choices=[choice],
-        usage=ModelUsage(
-            input_tokens=message.usage.input_tokens,
-            output_tokens=message.usage.output_tokens,
-            total_tokens=total_tokens,
-            input_tokens_cache_write=input_tokens_cache_write,
-            input_tokens_cache_read=input_tokens_cache_read,
-            reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
+    return (
+        ModelOutput(
+            model=message.model,
+            choices=[choice],
+            usage=ModelUsage(
+                input_tokens=message.usage.input_tokens,
+                output_tokens=message.usage.output_tokens,
+                total_tokens=total_tokens,
+                input_tokens_cache_write=input_tokens_cache_write,
+                input_tokens_cache_read=input_tokens_cache_read,
+                reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
+            ),
         ),
+        pause_turn,
     )
@@ -893,16 +1021,18 @@ def _names_for_tool_call(
     )
-def message_stop_reason(message: Message) -> StopReason:
+def message_stop_reason(message: Message) -> tuple[StopReason, bool]:
     match message.stop_reason:
         case "end_turn" | "stop_sequence":
-            return "stop"
+            return "stop", False
         case "tool_use":
-            return "tool_calls"
+            return "tool_calls", False
         case "max_tokens":
-            return message.stop_reason
+            return message.stop_reason, False
+        case "refusal":
+            return "content_filter", False
         case _:
-            return "unknown"
+            return "unknown", message.stop_reason == "pause_turn"
 def split_system_messages(
@@ -918,9 +1048,24 @@ def split_system_messages(
 async def message_param_content(
     content: Content,
-) -> TextBlockParam | ImageBlockParam | ThinkingBlockParam | RedactedThinkingBlockParam:
+) -> (
+    TextBlockParam
+    | ImageBlockParam
+    | ThinkingBlockParam
+    | RedactedThinkingBlockParam
+    | ServerToolUseBlockParam
+    | WebSearchToolResultBlockParam
+):
     if isinstance(content, ContentText):
-        return TextBlockParam(type="text", text=content.text or NO_CONTENT)
+        citations = (
+            [to_anthropic_citation(citation) for citation in content.citations]
+            if content.citations
+            else None
+        )
+        return TextBlockParam(
+            type="text", text=content.text or NO_CONTENT, citations=citations
+        )
     elif isinstance(content, ContentImage):
         # resolve to url
         image = await file_as_data_uri(content.image)
@@ -948,6 +1093,19 @@ async def message_param_content(
             return ThinkingBlockParam(
                 type="thinking", thinking=content.reasoning, signature=content.signature
             )
+    elif isinstance(content, ContentData):
+        match content.data.get("type", None):
+            case "server_tool_use":
+                return cast(
+                    ServerToolUseBlockParam,
+                    ServerToolUseBlock.model_validate(content.data).model_dump(),
+                )
+            case "web_search_tool_result":
+                return cast(
+                    WebSearchToolResultBlockParam,
+                    WebSearchToolResultBlock.model_validate(content.data).model_dump(),
+                )
+        raise NotImplementedError()
     else:
         raise RuntimeError(
             "Anthropic models do not currently support audio or video inputs."
@@ -990,3 +1148,7 @@ def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
         value = copy(value)
         value.update(data=BASE_64_DATA_REMOVED)
     return value
+def _content_list(input: str | list[Content]) -> list[Content]:
+    return [ContentText(text=input)] if isinstance(input, str) else input

inspect_ai/model/_providers/google.py CHANGED Viewed

@@ -26,6 +26,7 @@ from google.genai.types import (
     GenerateContentResponse,
     GenerateContentResponsePromptFeedback,
     GenerateContentResponseUsageMetadata,
+    GoogleSearch,
     HarmBlockThreshold,
     HarmCategory,
     HttpOptions,
@@ -48,6 +49,7 @@ from inspect_ai._util.content import (
 )
 from inspect_ai._util.content import (
     ContentAudio,
+    ContentData,
     ContentImage,
     ContentReasoning,
     ContentText,
@@ -74,6 +76,7 @@ from inspect_ai.model import (
     TopLogprob,
 )
 from inspect_ai.model._model_call import ModelCall
+from inspect_ai.model._providers._google_citations import get_candidate_citations
 from inspect_ai.tool import (
     ToolCall,
     ToolChoice,
@@ -247,7 +250,7 @@ class GoogleGenAIAPI(ModelAPI):
         # Create google-genai types.
         gemini_contents = await as_chat_messages(client, input)
-        gemini_tools = chat_tools(tools) if len(tools) > 0 else None
+        gemini_tools = self.chat_tools(tools) if len(tools) > 0 else None
         gemini_tool_config = chat_tool_config(tool_choice) if len(tools) > 0 else None
         parameters = GenerateContentConfig(
             http_options=HttpOptions(headers={HttpHooks.REQUEST_ID_HEADER: request_id}),
@@ -362,6 +365,61 @@ class GoogleGenAIAPI(ModelAPI):
         else:
             return None
+    def _use_native_search(self, tool: ToolInfo) -> bool:
+        return (
+            tool.name == "web_search"
+            and tool.options is not None
+            and "gemini" in tool.options
+            # Support "starts with" Gemini 2.0
+            and (self.is_gemini() and not self.is_gemini_1_5())
+        )
+    def _categorize_tool(
+        self, acc: tuple[bool, list[FunctionDeclaration]], tool: ToolInfo
+    ) -> tuple[bool, list[FunctionDeclaration]]:
+        """Reducer function that categorizes tools into native search vs function declarations.
+        Returns:
+            Tuple of (has_native_search, function_declarations) where has_native_search
+            is True if any tool uses native search, and function_declarations contains
+            all non-native-search tools converted to FunctionDeclaration objects.
+        """
+        return (
+            (True, acc[1])
+            if self._use_native_search(tool)
+            else (
+                acc[0],
+                acc[1]
+                + [
+                    FunctionDeclaration(
+                        name=tool.name,
+                        description=tool.description,
+                        parameters=schema_from_param(tool.parameters)
+                        if len(tool.parameters.properties) > 0
+                        else None,
+                    )
+                ],
+            )
+        )
+    def chat_tools(self, tools: list[ToolInfo]) -> ToolListUnion:
+        has_native_search, function_declarations = functools.reduce(
+            self._categorize_tool, tools, (False, list[FunctionDeclaration]())
+        )
+        # TODO: Google doesn't yet support native search concurrently with other tools.
+        # Revisit this from time to time to adapt when they fix it.
+        if has_native_search and function_declarations:
+            raise ValueError(
+                "Gemini does not yet support native search concurrently with other tools."
+            )
+        return (
+            [Tool(google_search=GoogleSearch())]
+            if has_native_search
+            else [Tool(function_declarations=function_declarations)]
+        )
 def safety_settings_to_list(
     safety_settings: list[SafetySettingDict],
@@ -500,6 +558,8 @@ async def content_part(client: Client, content: InspectContent | str) -> Part:
         return Part.from_text(text=content.text or NO_CONTENT)
     elif isinstance(content, ContentReasoning):
         return Part.from_text(text=content.reasoning or NO_CONTENT)
+    elif isinstance(content, ContentData):
+        assert False, "Google provider should never encounter ContentData"
     else:
         return await chat_content_to_part(client, content)
@@ -538,20 +598,6 @@ async def extract_system_message_as_parts(
     return system_parts or None
-def chat_tools(tools: list[ToolInfo]) -> ToolListUnion:
-    declarations = [
-        FunctionDeclaration(
-            name=tool.name,
-            description=tool.description,
-            parameters=schema_from_param(tool.parameters)
-            if len(tool.parameters.properties) > 0
-            else None,
-        )
-        for tool in tools
-    ]
-    return [Tool(function_declarations=declarations)]
 # https://ai.google.dev/gemini-api/tutorials/extract_structured_data#define_the_schema
 def schema_from_param(
     param: ToolParam | ToolParams, nullable: bool | None = False
@@ -656,19 +702,36 @@ def completion_choice_from_candidate(
                 | ContentImage
                 | ContentAudio
                 | ContentVideo
+                | ContentData
             ]
         ) = ""
     # content.parts can be None when the finish_reason is MALFORMED_FUNCTION_CALL
     elif candidate.content.parts is None:
         content = ""
     else:
-        content = []
-        for part in candidate.content.parts:
-            if part.text is not None:
-                if part.thought is True:
-                    content.append(ContentReasoning(reasoning=part.text))
-                else:
-                    content.append(ContentText(text=part.text))
+        # Google's grounded search metadata provides start/end indices for cited
+        # text based on the joining of all separate text parts (despite the doc
+        # suggesting that they provide part_index). Thankfully, the doc also says:
+        #
+        #   Exactly one field within a Part should be set, representing the specific type
+        #   of content being conveyed. Using multiple fields within the same `Part`
+        #   instance is considered invalid.
+        #
+        # That means that we can safely collapse adjacent parts with a `text` field
+        # and not fear that we're breaking other types of content parts
+        parts = functools.reduce(
+            _combine_text_parts, candidate.content.parts, list[Part]()
+        )
+        content = [
+            ContentReasoning(reasoning=part.text)
+            if part.thought is True
+            else ContentText(
+                text=part.text, citations=get_candidate_citations(candidate)
+            )
+            for part in parts
+            if part.text is not None
+        ]
     # now tool calls
     tool_calls: list[ToolCall] = []
@@ -922,3 +985,12 @@ async def file_for_content(
         files_db.put(content_sha256, str(upload.name))
         # return the file
         return upload
+def _combine_text_parts(acc: list[Part], part: Part) -> list[Part]:
+    """Combine adjacent text parts into a single part."""
+    return (
+        acc + [part]
+        if part.text is None or len(acc) == 0 or acc[-1].text is None
+        else acc[:-1] + [Part(text=acc[-1].text + part.text)]
+    )

inspect_ai/model/_providers/mistral.py CHANGED Viewed

@@ -44,9 +44,15 @@ from typing_extensions import override
 # TODO: Migration guide:
 # https://github.com/mistralai/client-python/blob/main/MIGRATION.md
 from inspect_ai._util.constants import NO_CONTENT
-from inspect_ai._util.content import Content, ContentImage, ContentText
+from inspect_ai._util.content import (
+    Content,
+    ContentImage,
+    ContentReasoning,
+    ContentText,
+)
 from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.images import file_as_data_uri
+from inspect_ai.model._reasoning import parse_content_with_reasoning
 from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
 from ..._util.httpx import httpx_should_retry
@@ -481,26 +487,33 @@ def completion_content(content: str | list[ContentChunk]) -> str | list[Content]
     if isinstance(content, str):
         return content
     else:
-        return [completion_content_chunk(c) for c in content]
+        return [item for c in content for item in completion_content_chunks(c)]
-def completion_content_chunk(content: ContentChunk) -> Content:
+def completion_content_chunks(content: ContentChunk) -> list[Content]:
     if isinstance(content, ReferenceChunk):
         raise TypeError("ReferenceChunk content is not supported by Inspect.")
     elif isinstance(content, TextChunk):
-        return ContentText(text=content.text)
+        parsed = parse_content_with_reasoning(content.text)
+        if parsed:
+            return [
+                ContentReasoning(reasoning=parsed.reasoning),
+                ContentText(text=parsed.content),
+            ]
+        else:
+            return [ContentText(text=content.text)]
     elif isinstance(content, DocumentURLChunk):
-        return ContentText(text=content.document_url)
+        return [ContentText(text=content.document_url)]
     else:
         if isinstance(content.image_url, str):
-            return ContentImage(image=content.image_url)
+            return [ContentImage(image=content.image_url)]
         else:
             match content.image_url.detail:
                 case "low" | "high":
                     detail: Literal["auto", "low", "high"] = content.image_url.detail
                 case _:
                     detail = "auto"
-            return ContentImage(image=content.image_url.url, detail=detail)
+            return [ContentImage(image=content.image_url.url, detail=detail)]
 def completion_choices_from_response(

inspect_ai/model/_providers/openai.py CHANGED Viewed

@@ -13,6 +13,7 @@ from openai._types import NOT_GIVEN
 from openai.types.chat import ChatCompletion
 from typing_extensions import override
+from inspect_ai._util.deprecation import deprecation_warning
 from inspect_ai._util.error import PrerequisiteError
 from inspect_ai._util.logger import warn_once
 from inspect_ai.model._openai import chat_choices_from_openai
@@ -64,6 +65,8 @@ class OpenAIAPI(ModelAPI):
         api_key: str | None = None,
         config: GenerateConfig = GenerateConfig(),
         responses_api: bool | None = None,
+        # Can't use the XxxDeprecatedArgs approach since this already has a **param
+        # but responses_store is deprecated and should not be used.
         responses_store: Literal["auto"] | bool = "auto",
         service_tier: str | None = None,
         client_timeout: float | None = None,
@@ -88,19 +91,18 @@ class OpenAIAPI(ModelAPI):
         )
         # is this a model we use responses api by default for?
-        responses_model = (
-            (self.is_o_series() and not self.is_o1_early())
-            or self.is_computer_use_preview()
-            or self.is_codex()
-        )
+        responses_preferred = (
+            self.is_o_series() and not self.is_o1_early()
+        ) or self.is_codex()
         # resolve whether we are forcing the responses api
-        self.responses_api = responses_api or responses_model
+        self.responses_api = self.is_computer_use_preview() or (
+            responses_api if responses_api is not None else responses_preferred
+        )
         # resolve whether we are using the responses store
-        self.responses_store = (
-            responses_store if isinstance(responses_store, bool) else responses_model
-        )
+        if isinstance(responses_store, bool):
+            deprecation_warning("`responses_store` is no longer supported.")
         # set service tier if specified
         self.service_tier = service_tier
@@ -260,7 +262,6 @@ class OpenAIAPI(ModelAPI):
                 tool_choice=tool_choice,
                 config=config,
                 service_tier=self.service_tier,
-                store=self.responses_store,
             )
         # allocate request_id (so we can see it from ModelCall)

inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl

inspect-ai 0.3.103py3-none-any.whl → 0.3.104py3-none-any.whl