PyPI - inspect-ai - Versions diffs - 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl - Mend

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

inspect_ai/_cli/common.py +3 -1
inspect_ai/_cli/eval.py +15 -9
inspect_ai/_display/core/active.py +4 -1
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +0 -5
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +79 -12
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/eval.py +10 -1
inspect_ai/_eval/loader.py +79 -19
inspect_ai/_eval/registry.py +6 -0
inspect_ai/_eval/score.py +3 -1
inspect_ai/_eval/task/results.py +51 -22
inspect_ai/_eval/task/run.py +47 -13
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25498 -2044
inspect_ai/_view/www/log-schema.json +32 -2
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +14 -16
inspect_ai/_view/www/src/Types.mjs +1 -2
inspect_ai/_view/www/src/api/Types.ts +133 -0
inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
inspect_ai/_view/www/src/api/api-http.ts +219 -0
inspect_ai/_view/www/src/api/api-shared.ts +47 -0
inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
inspect_ai/_view/www/src/api/index.ts +51 -0
inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +77 -4
inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +13 -2
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
inspect_ai/_view/www/src/utils/vscode.ts +36 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_human/manager.py +1 -1
inspect_ai/approval/_policy.py +12 -6
inspect_ai/log/_log.py +1 -1
inspect_ai/log/_samples.py +16 -0
inspect_ai/log/_transcript.py +4 -1
inspect_ai/model/_call_tools.py +59 -0
inspect_ai/model/_conversation.py +16 -7
inspect_ai/model/_generate_config.py +12 -12
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +22 -2
inspect_ai/model/_openai.py +383 -0
inspect_ai/model/_providers/anthropic.py +152 -55
inspect_ai/model/_providers/azureai.py +21 -21
inspect_ai/model/_providers/bedrock.py +37 -40
inspect_ai/model/_providers/goodfire.py +248 -0
inspect_ai/model/_providers/google.py +46 -54
inspect_ai/model/_providers/groq.py +7 -3
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +13 -12
inspect_ai/model/_providers/openai.py +51 -218
inspect_ai/model/_providers/openai_o1.py +11 -12
inspect_ai/model/_providers/providers.py +23 -1
inspect_ai/model/_providers/together.py +12 -12
inspect_ai/model/_providers/util/__init__.py +2 -3
inspect_ai/model/_providers/util/hf_handler.py +1 -1
inspect_ai/model/_providers/util/llama31.py +1 -1
inspect_ai/model/_providers/util/util.py +0 -76
inspect_ai/model/_providers/vertex.py +1 -4
inspect_ai/scorer/_metric.py +3 -0
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +4 -3
inspect_ai/solver/__init__.py +4 -5
inspect_ai/solver/_basic_agent.py +1 -1
inspect_ai/solver/_bridge/__init__.py +3 -0
inspect_ai/solver/_bridge/bridge.py +100 -0
inspect_ai/solver/_bridge/patch.py +170 -0
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_solver.py +6 -0
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +2 -0
inspect_ai/tool/_tool.py +12 -1
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_display.py +5 -0
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/docker/docker.py +64 -1
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
inspect_ai/util/_sandbox/environment.py +14 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
inspect_ai/_view/www/src/api/Types.mjs +0 -117
inspect_ai/_view/www/src/api/api-http.mjs +0 -300
inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
inspect_ai/_view/www/src/api/index.mjs +0 -49
inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -1,17 +1,26 @@
 import functools
 import os
+import sys
 from copy import copy
 from logging import getLogger
-from typing import Any, Literal, Tuple, cast
+from typing import Any, Literal, Tuple, TypedDict, cast
+if sys.version_info >= (3, 11):
+    from typing import NotRequired
+else:
+    from typing_extensions import NotRequired
 from anthropic import (
     APIConnectionError,
     AsyncAnthropic,
     AsyncAnthropicBedrock,
+    AsyncAnthropicVertex,
     BadRequestError,
     InternalServerError,
+    NotGiven,
     RateLimitError,
 )
+from anthropic._types import Body
 from anthropic.types import (
     ImageBlockParam,
     Message,
@@ -27,7 +36,11 @@ from anthropic.types import (
 from pydantic import JsonValue
 from typing_extensions import override
-from inspect_ai._util.constants import BASE_64_DATA_REMOVED, DEFAULT_MAX_RETRIES
+from inspect_ai._util.constants import (
+    BASE_64_DATA_REMOVED,
+    DEFAULT_MAX_RETRIES,
+    NO_CONTENT,
+)
 from inspect_ai._util.content import Content, ContentImage, ContentText
 from inspect_ai._util.error import exception_message
 from inspect_ai._util.images import file_as_data_uri
@@ -35,20 +48,11 @@ from inspect_ai._util.logger import warn_once
 from inspect_ai._util.url import data_uri_mime_type, data_uri_to_base64
 from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
-from .._chat_message import (
-    ChatMessage,
-    ChatMessageAssistant,
-    ChatMessageSystem,
-)
+from .._chat_message import ChatMessage, ChatMessageAssistant, ChatMessageSystem
 from .._generate_config import GenerateConfig
 from .._model import ModelAPI
 from .._model_call import ModelCall
-from .._model_output import (
-    ChatCompletionChoice,
-    ModelOutput,
-    ModelUsage,
-    StopReason,
-)
+from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage, StopReason
 from .util import environment_prerequisite_error, model_base_url
 logger = getLogger(__name__)
@@ -63,15 +67,25 @@ class AnthropicAPI(ModelAPI):
         base_url: str | None = None,
         api_key: str | None = None,
         config: GenerateConfig = GenerateConfig(),
-        bedrock: bool = False,
         **model_args: Any,
     ):
         # extract any service prefix from model name
         parts = model_name.split("/")
         if len(parts) > 1:
-            service = parts[0]
-            bedrock = service == "bedrock"
+            self.service: str | None = parts[0]
             model_name = "/".join(parts[1:])
+        else:
+            self.service = None
+        # collect gemerate model_args (then delete them so we can pass the rest on)
+        def collect_model_arg(name: str) -> Any | None:
+            nonlocal model_args
+            value = model_args.get(name, None)
+            if value is not None:
+                model_args.pop(name)
+            return value
+        self.extra_body: Body | None = collect_model_arg("extra_body")
         # call super
         super().__init__(
@@ -83,7 +97,7 @@ class AnthropicAPI(ModelAPI):
         )
         # create client
-        if bedrock:
+        if self.is_bedrock():
             base_url = model_base_url(
                 base_url, ["ANTHROPIC_BEDROCK_BASE_URL", "BEDROCK_ANTHROPIC_BASE_URL"]
             )
@@ -94,7 +108,9 @@ class AnthropicAPI(ModelAPI):
             if base_region is None:
                 aws_region = os.environ.get("AWS_DEFAULT_REGION", None)
-            self.client: AsyncAnthropic | AsyncAnthropicBedrock = AsyncAnthropicBedrock(
+            self.client: (
+                AsyncAnthropic | AsyncAnthropicBedrock | AsyncAnthropicVertex
+            ) = AsyncAnthropicBedrock(
                 base_url=base_url,
                 max_retries=(
                     config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
@@ -102,6 +118,21 @@ class AnthropicAPI(ModelAPI):
                 aws_region=aws_region,
                 **model_args,
             )
+        elif self.is_vertex():
+            base_url = model_base_url(
+                base_url, ["ANTHROPIC_VERTEX_BASE_URL", "VERTEX_ANTHROPIC_BASE_URL"]
+            )
+            region = os.environ.get("ANTHROPIC_VERTEX_REGION", NotGiven())
+            project_id = os.environ.get("ANTHROPIC_VERTEX_PROJECT_ID", NotGiven())
+            self.client = AsyncAnthropicVertex(
+                region=region,
+                project_id=project_id,
+                base_url=base_url,
+                max_retries=(
+                    config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
+                ),
+                **model_args,
+            )
         else:
             # resolve api_key
             if not self.api_key:
@@ -118,13 +149,19 @@ class AnthropicAPI(ModelAPI):
                 **model_args,
             )
+    def is_bedrock(self) -> bool:
+        return self.service == "bedrock"
+    def is_vertex(self) -> bool:
+        return self.service == "vertex"
     async def generate(
         self,
         input: list[ChatMessage],
         tools: list[ToolInfo],
         tool_choice: ToolChoice,
         config: GenerateConfig,
-    ) -> ModelOutput | tuple[ModelOutput, ModelCall]:
+    ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
         # setup request and response for ModelCall
         request: dict[str, Any] = {}
         response: dict[str, Any] = {}
@@ -142,7 +179,7 @@ class AnthropicAPI(ModelAPI):
                 system_param,
                 tools_param,
                 messages,
-                cache_prompt,
+                computer_use,
             ) = await resolve_chat_input(self.model_name, input, tools, config)
             # prepare request params (assembed this way so we can log the raw model call)
@@ -158,13 +195,15 @@ class AnthropicAPI(ModelAPI):
             # additional options
             request = request | self.completion_params(config)
-            # caching header
-            if cache_prompt:
-                request["extra_headers"] = {
-                    "anthropic-beta": "prompt-caching-2024-07-31"
-                }
+            # computer use beta
+            if computer_use:
+                request["extra_headers"] = {"anthropic-beta": "computer-use-2024-10-22"}
-            # call model
+            # extra_body
+            if self.extra_body is not None:
+                request["extra_body"] = self.extra_body
+            # make request
             message = await self.client.messages.create(**request, stream=False)
             # set response for ModelCall
@@ -177,11 +216,7 @@ class AnthropicAPI(ModelAPI):
             return output, model_call()
         except BadRequestError as ex:
-            error_output = self.handle_bad_request(ex)
-            if error_output is not None:
-                return error_output, model_call()
-            else:
-                raise ex
+            return self.handle_bad_request(ex), model_call()
     def completion_params(self, config: GenerateConfig) -> dict[str, Any]:
         params = dict(model=self.model_name, max_tokens=cast(int, config.max_tokens))
@@ -234,7 +269,7 @@ class AnthropicAPI(ModelAPI):
         return True
     # convert some common BadRequestError states into 'refusal' model output
-    def handle_bad_request(self, ex: BadRequestError) -> ModelOutput | None:
+    def handle_bad_request(self, ex: BadRequestError) -> ModelOutput | Exception:
         error = exception_message(ex).lower()
         content: str | None = None
         stop_reason: StopReason | None = None
@@ -256,6 +291,9 @@ class AnthropicAPI(ModelAPI):
         elif "content filtering" in error:
             content = "Sorry, but I am unable to help with that request."
             stop_reason = "content_filter"
+        else:
+            content = error
+            stop_reason = "unknown"
         if content and stop_reason:
             return ModelOutput.from_content(
@@ -265,7 +303,21 @@ class AnthropicAPI(ModelAPI):
                 error=error,
             )
         else:
-            return None
+            return ex
+# native anthropic tool definitions for computer use beta
+# https://docs.anthropic.com/en/docs/build-with-claude/computer-use
+class ComputerUseToolParam(TypedDict):
+    type: str
+    name: str
+    display_width_px: NotRequired[int]
+    display_height_px: NotRequired[int]
+    display_number: NotRequired[int]
+# tools can be either a stock tool param or a special computer use tool param
+ToolParamDef = ToolParam | ComputerUseToolParam
 async def resolve_chat_input(
@@ -273,7 +325,7 @@ async def resolve_chat_input(
     input: list[ChatMessage],
     tools: list[ToolInfo],
     config: GenerateConfig,
-) -> Tuple[list[TextBlockParam] | None, list[ToolParam], list[MessageParam], bool]:
+) -> Tuple[list[TextBlockParam] | None, list[ToolParamDef], list[MessageParam], bool]:
     # extract system message
     system_messages, messages = split_system_messages(input, config)
@@ -286,14 +338,7 @@ async def resolve_chat_input(
     )
     # tools
-    tools_params = [
-        ToolParam(
-            name=tool.name,
-            description=tool.description,
-            input_schema=tool.parameters.model_dump(exclude_none=True),
-        )
-        for tool in tools
-    ]
+    tools_params, computer_use = tool_params_for_tools(tools, config)
     # system messages
     if len(system_messages) > 0:
@@ -343,10 +388,66 @@ async def resolve_chat_input(
                 add_cache_control(cast(dict[str, Any], content[-1]))
     # return chat input
-    return system_param, tools_params, message_params, cache_prompt
+    return system_param, tools_params, message_params, computer_use
+def tool_params_for_tools(
+    tools: list[ToolInfo], config: GenerateConfig
+) -> tuple[list[ToolParamDef], bool]:
+    # tool params and computer_use bit to return
+    tool_params: list[ToolParamDef] = []
+    computer_use = False
+    # for each tool, check if it has a native computer use implementation and use that
+    # when available (noting that we need to set the computer use request header)
+    for tool in tools:
+        computer_use_tool = (
+            computer_use_tool_param(tool)
+            if config.internal_tools is not False
+            else None
+        )
+        if computer_use_tool:
+            tool_params.append(computer_use_tool)
+            computer_use = True
+        else:
+            tool_params.append(
+                ToolParam(
+                    name=tool.name,
+                    description=tool.description,
+                    input_schema=tool.parameters.model_dump(exclude_none=True),
+                )
+            )
+    return tool_params, computer_use
-def add_cache_control(param: TextBlockParam | ToolParam | dict[str, Any]) -> None:
+def computer_use_tool_param(tool: ToolInfo) -> ComputerUseToolParam | None:
+    # check for compatible 'computer' tool
+    if tool.name == "computer" and (
+        sorted(tool.parameters.properties.keys())
+        == sorted(["action", "coordinate", "text"])
+    ):
+        return ComputerUseToolParam(
+            type="computer_20241022",
+            name="computer",
+            # Note: The dimensions passed here for display_width_px and display_height_px should
+            # match the dimensions of screenshots returned by the tool.
+            # Those dimensions will always be one of the values in MAX_SCALING_TARGETS
+            # in _x11_client.py.
+            # TODO: enhance this code to calculate the dimensions based on the scaled screen
+            # size used by the container.
+            display_width_px=1366,
+            display_height_px=768,
+            display_number=1,
+        )
+    # not a computer_use tool
+    else:
+        return None
+def add_cache_control(
+    param: TextBlockParam | ToolParam | ComputerUseToolParam | dict[str, Any],
+) -> None:
     cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
@@ -404,12 +505,13 @@ def message_tool_choice(tool_choice: ToolChoice) -> message_create_params.ToolCh
         return {"type": "auto"}
-# text we insert when there is no content passed
-# (as this will result in an Anthropic API error)
-NO_CONTENT = "(no content)"
 async def message_param(message: ChatMessage) -> MessageParam:
+    # if content is empty that is going to result in an error when we replay
+    # this message to claude, so in that case insert a NO_CONTENT message
+    if isinstance(message.content, list) and len(message.content) == 0:
+        message = message.model_copy()
+        message.content = [ContentText(text=NO_CONTENT)]
     # no system role for anthropic (this is more like an assertion,
     # as these should have already been filtered out)
     if message.role == "system":
@@ -451,7 +553,7 @@ async def message_param(message: ChatMessage) -> MessageParam:
     elif message.role == "assistant" and message.tool_calls:
         # first include content (claude <thinking>)
         tools_content: list[TextBlockParam | ImageBlockParam | ToolUseBlockParam] = (
-            [TextBlockParam(type="text", text=message.content)]
+            [TextBlockParam(type="text", text=message.content or NO_CONTENT)]
             if isinstance(message.content, str)
             else (
                 [(await message_param_content(content)) for content in message.content]
@@ -520,11 +622,6 @@ def model_output_from_message(message: Message, tools: list[ToolInfo]) -> ModelO
                 )
             )
-    # if content is empty that is going to result in an error when we replay
-    # this message to claude, so in that case insert a NO_CONTENT message
-    if len(content) == 0:
-        content = [ContentText(text=NO_CONTENT)]
     # resolve choice
     choice = ChatCompletionChoice(
         message=ChatMessageAssistant(

inspect_ai/model/_providers/azureai.py CHANGED Viewed

@@ -37,6 +37,7 @@ from inspect_ai.tool import ToolChoice, ToolInfo
 from inspect_ai.tool._tool_call import ToolCall
 from inspect_ai.tool._tool_choice import ToolFunction
+from .._call_tools import parse_tool_call
 from .._chat_message import (
     ChatMessage,
     ChatMessageAssistant,
@@ -60,7 +61,6 @@ from .util import (
 )
 from .util.chatapi import ChatAPIHandler
 from .util.llama31 import Llama31Handler
-from .util.util import parse_tool_call
 AZUREAI_API_KEY = "AZUREAI_API_KEY"
 AZUREAI_ENDPOINT_KEY = "AZUREAI_ENDPOINT_KEY"
@@ -130,7 +130,7 @@ class AzureAIAPI(ModelAPI):
         tools: list[ToolInfo],
         tool_choice: ToolChoice,
         config: GenerateConfig,
-    ) -> ModelOutput | tuple[ModelOutput, ModelCall]:
+    ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
         # emulate tools (auto for llama, opt-in for others)
         if self.emulate_tools is None and self.is_llama():
             handler: ChatAPIHandler | None = Llama31Handler()
@@ -162,6 +162,19 @@ class AzureAIAPI(ModelAPI):
             model_extras=self.model_args,
         )
+        def model_call(response: ChatCompletions | None = None) -> ModelCall:
+            return ModelCall.create(
+                request=request
+                | dict(
+                    messages=[message.as_dict() for message in request["messages"]],
+                    tools=[tool.as_dict() for tool in request["tools"]]
+                    if request.get("tools", None) is not None
+                    else None,
+                ),
+                response=response.as_dict() if response else {},
+                filter=image_url_filter,
+            )
         # make call
         try:
             response: ChatCompletions = await client.complete(**request)
@@ -173,19 +186,10 @@ class AzureAIAPI(ModelAPI):
                     output_tokens=response.usage.completion_tokens,
                     total_tokens=response.usage.total_tokens,
                 ),
-            ), ModelCall.create(
-                request=request
-                | dict(
-                    messages=[message.as_dict() for message in request["messages"]],
-                    tools=[tool.as_dict() for tool in request["tools"]]
-                    if request.get("tools", None) is not None
-                    else None,
-                ),
-                response=response.as_dict(),
-                filter=image_url_filter,
-            )
+            ), model_call(response)
         except AzureError as ex:
-            return self.handle_azure_error(ex)
+            return self.handle_azure_error(ex), model_call()
         finally:
             await client.close()
@@ -251,7 +255,7 @@ class AzureAIAPI(ModelAPI):
     def is_mistral(self) -> bool:
         return "mistral" in self.model_name.lower()
-    def handle_azure_error(self, ex: AzureError) -> ModelOutput:
+    def handle_azure_error(self, ex: AzureError) -> ModelOutput | Exception:
         if isinstance(ex, HttpResponseError):
             response = str(ex.message)
             if "maximum context length" in response.lower():
@@ -260,12 +264,8 @@ class AzureAIAPI(ModelAPI):
                     content=response,
                     stop_reason="model_length",
                 )
-            elif ex.status_code == 400 and ex.error:
-                return ModelOutput.from_content(
-                    model=self.model_name,
-                    content=f"Your request triggered an error: {ex.error}",
-                    stop_reason="content_filter",
-                )
+            elif ex.status_code == 400:
+                return ex
         raise ex

inspect_ai/model/_providers/bedrock.py CHANGED Viewed

@@ -27,11 +27,7 @@ from .._chat_message import (
 from .._generate_config import GenerateConfig
 from .._model import ModelAPI
 from .._model_call import ModelCall
-from .._model_output import (
-    ChatCompletionChoice,
-    ModelOutput,
-    ModelUsage,
-)
+from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage
 from .util import (
     model_base_url,
 )
@@ -307,7 +303,7 @@ class BedrockAPI(ModelAPI):
         tools: list[ToolInfo],
         tool_choice: ToolChoice,
         config: GenerateConfig,
-    ) -> ModelOutput | tuple[ModelOutput, ModelCall]:
+    ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
         from botocore.config import Config
         from botocore.exceptions import ClientError
@@ -339,25 +335,33 @@ class BedrockAPI(ModelAPI):
             # Resolve the input messages into converse messages
             system, messages = await converse_messages(input)
-            try:
-                # Make the request
-                request = ConverseClientConverseRequest(
-                    modelId=self.model_name,
-                    messages=messages,
-                    system=system,
-                    inferenceConfig=ConverseInferenceConfig(
-                        maxTokens=config.max_tokens,
-                        temperature=config.temperature,
-                        topP=config.top_p,
-                        stopSequences=config.stop_seqs,
+            # Make the request
+            request = ConverseClientConverseRequest(
+                modelId=self.model_name,
+                messages=messages,
+                system=system,
+                inferenceConfig=ConverseInferenceConfig(
+                    maxTokens=config.max_tokens,
+                    temperature=config.temperature,
+                    topP=config.top_p,
+                    stopSequences=config.stop_seqs,
+                ),
+                additionalModelRequestFields={
+                    "top_k": config.top_k,
+                    **config.model_config,
+                },
+                toolConfig=tool_config,
+            )
+            def model_call(response: dict[str, Any] | None = None) -> ModelCall:
+                return ModelCall.create(
+                    request=replace_bytes_with_placeholder(
+                        request.model_dump(exclude_none=True)
                     ),
-                    additionalModelRequestFields={
-                        "top_k": config.top_k,
-                        **config.model_config,
-                    },
-                    toolConfig=tool_config,
+                    response=response,
                 )
+            try:
                 # Process the reponse
                 response = await client.converse(
                     **request.model_dump(exclude_none=True)
@@ -366,32 +370,24 @@ class BedrockAPI(ModelAPI):
             except ClientError as ex:
                 # Look for an explicit validation exception
-                if (
-                    ex.response["Error"]["Code"] == "ValidationException"
-                    and "Too many input tokens" in ex.response["Error"]["Message"]
-                ):
+                if ex.response["Error"]["Code"] == "ValidationException":
                     response = ex.response["Error"]["Message"]
-                    return ModelOutput.from_content(
-                        model=self.model_name,
-                        content=response,
-                        stop_reason="model_length",
-                    )
+                    if "Too many input tokens" in response:
+                        return ModelOutput.from_content(
+                            model=self.model_name,
+                            content=response,
+                            stop_reason="model_length",
+                        )
+                    else:
+                        return ex, model_call(None)
                 else:
                     raise ex
         # create a model output from the response
         output = model_output_from_response(self.model_name, converse_response, tools)
-        # record call
-        call = ModelCall.create(
-            request=replace_bytes_with_placeholder(
-                request.model_dump(exclude_none=True)
-            ),
-            response=response,
-        )
         # return
-        return output, call
+        return output, model_call(response)
 async def converse_messages(
@@ -550,6 +546,7 @@ async def converse_chat_message(
                 "Tool call is missing a tool call id, which is required for Converse API"
             )
         if message.function is None:
+            print(message)
             raise ValueError(
                 "Tool call is missing a function, which is required for Converse API"
             )

inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl