PyPI - inspect-ai - Versions diffs - 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl - Mend

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

inspect_ai/_cli/common.py +3 -1
inspect_ai/_cli/eval.py +15 -9
inspect_ai/_display/core/active.py +4 -1
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +0 -5
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +79 -12
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/eval.py +10 -1
inspect_ai/_eval/loader.py +79 -19
inspect_ai/_eval/registry.py +6 -0
inspect_ai/_eval/score.py +3 -1
inspect_ai/_eval/task/results.py +51 -22
inspect_ai/_eval/task/run.py +47 -13
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25498 -2044
inspect_ai/_view/www/log-schema.json +32 -2
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +14 -16
inspect_ai/_view/www/src/Types.mjs +1 -2
inspect_ai/_view/www/src/api/Types.ts +133 -0
inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
inspect_ai/_view/www/src/api/api-http.ts +219 -0
inspect_ai/_view/www/src/api/api-shared.ts +47 -0
inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
inspect_ai/_view/www/src/api/index.ts +51 -0
inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +77 -4
inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +13 -2
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
inspect_ai/_view/www/src/utils/vscode.ts +36 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_human/manager.py +1 -1
inspect_ai/approval/_policy.py +12 -6
inspect_ai/log/_log.py +1 -1
inspect_ai/log/_samples.py +16 -0
inspect_ai/log/_transcript.py +4 -1
inspect_ai/model/_call_tools.py +59 -0
inspect_ai/model/_conversation.py +16 -7
inspect_ai/model/_generate_config.py +12 -12
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +22 -2
inspect_ai/model/_openai.py +383 -0
inspect_ai/model/_providers/anthropic.py +152 -55
inspect_ai/model/_providers/azureai.py +21 -21
inspect_ai/model/_providers/bedrock.py +37 -40
inspect_ai/model/_providers/goodfire.py +248 -0
inspect_ai/model/_providers/google.py +46 -54
inspect_ai/model/_providers/groq.py +7 -3
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +13 -12
inspect_ai/model/_providers/openai.py +51 -218
inspect_ai/model/_providers/openai_o1.py +11 -12
inspect_ai/model/_providers/providers.py +23 -1
inspect_ai/model/_providers/together.py +12 -12
inspect_ai/model/_providers/util/__init__.py +2 -3
inspect_ai/model/_providers/util/hf_handler.py +1 -1
inspect_ai/model/_providers/util/llama31.py +1 -1
inspect_ai/model/_providers/util/util.py +0 -76
inspect_ai/model/_providers/vertex.py +1 -4
inspect_ai/scorer/_metric.py +3 -0
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +4 -3
inspect_ai/solver/__init__.py +4 -5
inspect_ai/solver/_basic_agent.py +1 -1
inspect_ai/solver/_bridge/__init__.py +3 -0
inspect_ai/solver/_bridge/bridge.py +100 -0
inspect_ai/solver/_bridge/patch.py +170 -0
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_solver.py +6 -0
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +2 -0
inspect_ai/tool/_tool.py +12 -1
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_display.py +5 -0
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/docker/docker.py +64 -1
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
inspect_ai/util/_sandbox/environment.py +14 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
inspect_ai/_view/www/src/api/Types.mjs +0 -117
inspect_ai/_view/www/src/api/api-http.mjs +0 -300
inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
inspect_ai/_view/www/src/api/index.mjs +0 -49
inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/openai.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import json
 import os
 from logging import getLogger
 from typing import Any
@@ -15,51 +14,39 @@ from openai import (
 from openai._types import NOT_GIVEN
 from openai.types.chat import (
     ChatCompletion,
-    ChatCompletionAssistantMessageParam,
-    ChatCompletionContentPartImageParam,
-    ChatCompletionContentPartInputAudioParam,
-    ChatCompletionContentPartParam,
-    ChatCompletionContentPartTextParam,
-    ChatCompletionDeveloperMessageParam,
-    ChatCompletionMessage,
-    ChatCompletionMessageParam,
-    ChatCompletionMessageToolCallParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionSystemMessageParam,
-    ChatCompletionToolChoiceOptionParam,
-    ChatCompletionToolMessageParam,
-    ChatCompletionToolParam,
-    ChatCompletionUserMessageParam,
 )
-from openai.types.shared_params.function_definition import FunctionDefinition
 from typing_extensions import override
 from inspect_ai._util.constants import DEFAULT_MAX_RETRIES
-from inspect_ai._util.content import Content
 from inspect_ai._util.error import PrerequisiteError
-from inspect_ai._util.images import file_as_data_uri
 from inspect_ai._util.logger import warn_once
-from inspect_ai._util.url import is_http_url
-from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
+from inspect_ai.model._openai import chat_choices_from_openai
+from inspect_ai.tool import ToolChoice, ToolInfo
-from .._chat_message import ChatMessage, ChatMessageAssistant
+from .._chat_message import ChatMessage
 from .._generate_config import GenerateConfig
 from .._image import image_url_filter
 from .._model import ModelAPI
 from .._model_call import ModelCall
 from .._model_output import (
     ChatCompletionChoice,
-    Logprobs,
     ModelOutput,
     ModelUsage,
     StopReason,
 )
+from .._openai import (
+    is_o1,
+    is_o1_full,
+    is_o1_mini,
+    is_o1_preview,
+    openai_chat_messages,
+    openai_chat_tool_choice,
+    openai_chat_tools,
+)
 from .openai_o1 import generate_o1
 from .util import (
-    as_stop_reason,
     environment_prerequisite_error,
     model_base_url,
-    parse_tool_call,
 )
 logger = getLogger(__name__)
@@ -87,20 +74,22 @@ class OpenAIAPI(ModelAPI):
             config=config,
         )
-        # pull out azure model_arg
-        AZURE_MODEL_ARG = "azure"
-        is_azure = False
-        if AZURE_MODEL_ARG in model_args:
-            is_azure = model_args.get(AZURE_MODEL_ARG, False)
-            del model_args[AZURE_MODEL_ARG]
+        # extract any service prefix from model name
+        parts = model_name.split("/")
+        if len(parts) > 1:
+            self.service: str | None = parts[0]
+            model_name = "/".join(parts[1:])
+        else:
+            self.service = None
         # resolve api_key
         if not self.api_key:
             self.api_key = os.environ.get(
                 AZUREAI_OPENAI_API_KEY, os.environ.get(AZURE_OPENAI_API_KEY, None)
             )
-            if self.api_key:
-                is_azure = True
+            # backward compatibility for when env vars determined service
+            if self.api_key and (os.environ.get(OPENAI_API_KEY, None) is None):
+                self.service = "azure"
             else:
                 self.api_key = os.environ.get(OPENAI_API_KEY, None)
                 if not self.api_key:
@@ -113,7 +102,7 @@ class OpenAIAPI(ModelAPI):
                     )
         # azure client
-        if is_azure:
+        if self.is_azure():
             # resolve base_url
             base_url = model_base_url(
                 base_url,
@@ -148,17 +137,20 @@ class OpenAIAPI(ModelAPI):
                 **model_args,
             )
+    def is_azure(self) -> bool:
+        return self.service == "azure"
     def is_o1(self) -> bool:
-        return self.model_name.startswith("o1")
+        return is_o1(self.model_name)
     def is_o1_full(self) -> bool:
-        return self.is_o1() and not self.is_o1_mini() and not self.is_o1_preview()
+        return is_o1_full(self.model_name)
     def is_o1_mini(self) -> bool:
-        return self.model_name.startswith("o1-mini")
+        return is_o1_mini(self.model_name)
     def is_o1_preview(self) -> bool:
-        return self.model_name.startswith("o1-preview")
+        return is_o1_preview(self.model_name)
     async def generate(
         self,
@@ -166,7 +158,7 @@ class OpenAIAPI(ModelAPI):
         tools: list[ToolInfo],
         tool_choice: ToolChoice,
         config: GenerateConfig,
-    ) -> ModelOutput | tuple[ModelOutput, ModelCall]:
+    ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
         # short-circuit to call o1- models that are text only
         if self.is_o1_preview() or self.is_o1_mini():
             return await generate_o1(
@@ -198,9 +190,11 @@ class OpenAIAPI(ModelAPI):
         # prepare request (we do this so we can log the ModelCall)
         request = dict(
-            messages=await as_openai_chat_messages(input, self.is_o1_full()),
-            tools=chat_tools(tools) if len(tools) > 0 else NOT_GIVEN,
-            tool_choice=chat_tool_choice(tool_choice) if len(tools) > 0 else NOT_GIVEN,
+            messages=await openai_chat_messages(input, self.model_name),
+            tools=openai_chat_tools(tools) if len(tools) > 0 else NOT_GIVEN,
+            tool_choice=openai_chat_tool_choice(tool_choice)
+            if len(tools) > 0
+            else NOT_GIVEN,
             **self.completion_params(config, len(tools) > 0),
         )
@@ -237,7 +231,7 @@ class OpenAIAPI(ModelAPI):
         self, response: ChatCompletion, tools: list[ToolInfo]
     ) -> list[ChatCompletionChoice]:
         # adding this as a method so we can override from other classes (e.g together)
-        return chat_choices_from_response(response, tools)
+        return chat_choices_from_openai(response, tools)
     @override
     def is_rate_limit(self, ex: BaseException) -> bool:
@@ -307,184 +301,23 @@ class OpenAIAPI(ModelAPI):
         return params
     # convert some well known bad request errors into ModelOutput
-    def handle_bad_request(self, e: BadRequestError) -> ModelOutput:
-        if e.status_code == 400:
-            # extract message
-            if isinstance(e.body, dict) and "message" in e.body.keys():
-                content = str(e.body.get("message"))
-            else:
-                content = e.message
+    def handle_bad_request(self, e: BadRequestError) -> ModelOutput | Exception:
+        # extract message
+        if isinstance(e.body, dict) and "message" in e.body.keys():
+            content = str(e.body.get("message"))
+        else:
+            content = e.message
-            # narrow stop_reason
-            if e.code == "context_length_exceeded":
-                stop_reason: StopReason = "model_length"
-            elif e.code == "invalid_prompt":
-                stop_reason = "content_filter"
-            else:
-                stop_reason = "unknown"
+        # narrow stop_reason
+        stop_reason: StopReason | None = None
+        if e.code == "context_length_exceeded":
+            stop_reason = "model_length"
+        elif e.code == "invalid_prompt":
+            stop_reason = "content_filter"
+        if stop_reason:
             return ModelOutput.from_content(
                 model=self.model_name, content=content, stop_reason=stop_reason
             )
         else:
-            raise e
-async def as_openai_chat_messages(
-    messages: list[ChatMessage], o1_full: bool
-) -> list[ChatCompletionMessageParam]:
-    return [await openai_chat_message(message, o1_full) for message in messages]
-async def openai_chat_message(
-    message: ChatMessage, o1_full: bool
-) -> ChatCompletionMessageParam:
-    if message.role == "system":
-        if o1_full:
-            return ChatCompletionDeveloperMessageParam(
-                role="developer", content=message.text
-            )
-        else:
-            return ChatCompletionSystemMessageParam(
-                role=message.role, content=message.text
-            )
-    elif message.role == "user":
-        return ChatCompletionUserMessageParam(
-            role=message.role,
-            content=(
-                message.content
-                if isinstance(message.content, str)
-                else [
-                    await as_chat_completion_part(content)
-                    for content in message.content
-                ]
-            ),
-        )
-    elif message.role == "assistant":
-        if message.tool_calls:
-            return ChatCompletionAssistantMessageParam(
-                role=message.role,
-                content=message.text,
-                tool_calls=[chat_tool_call(call) for call in message.tool_calls],
-            )
-        else:
-            return ChatCompletionAssistantMessageParam(
-                role=message.role, content=message.text
-            )
-    elif message.role == "tool":
-        return ChatCompletionToolMessageParam(
-            role=message.role,
-            content=(
-                f"Error: {message.error.message}" if message.error else message.text
-            ),
-            tool_call_id=str(message.tool_call_id),
-        )
-    else:
-        raise ValueError(f"Unexpected message role {message.role}")
-def chat_tool_call(tool_call: ToolCall) -> ChatCompletionMessageToolCallParam:
-    return ChatCompletionMessageToolCallParam(
-        id=tool_call.id,
-        function=dict(
-            name=tool_call.function, arguments=json.dumps(tool_call.arguments)
-        ),
-        type=tool_call.type,
-    )
-def chat_tools(tools: list[ToolInfo]) -> list[ChatCompletionToolParam]:
-    return [chat_tool_param(tool) for tool in tools]
-def chat_tool_param(tool: ToolInfo) -> ChatCompletionToolParam:
-    function = FunctionDefinition(
-        name=tool.name,
-        description=tool.description,
-        parameters=tool.parameters.model_dump(exclude_none=True),
-    )
-    return ChatCompletionToolParam(type="function", function=function)
-def chat_tool_choice(tool_choice: ToolChoice) -> ChatCompletionToolChoiceOptionParam:
-    if isinstance(tool_choice, ToolFunction):
-        return ChatCompletionNamedToolChoiceParam(
-            type="function", function=dict(name=tool_choice.name)
-        )
-    # openai supports 'any' via the 'required' keyword
-    elif tool_choice == "any":
-        return "required"
-    else:
-        return tool_choice
-def chat_tool_calls(
-    message: ChatCompletionMessage, tools: list[ToolInfo]
-) -> list[ToolCall] | None:
-    if message.tool_calls:
-        return [
-            parse_tool_call(call.id, call.function.name, call.function.arguments, tools)
-            for call in message.tool_calls
-        ]
-    else:
-        return None
-def chat_choices_from_response(
-    response: ChatCompletion, tools: list[ToolInfo]
-) -> list[ChatCompletionChoice]:
-    choices = list(response.choices)
-    choices.sort(key=lambda c: c.index)
-    return [
-        ChatCompletionChoice(
-            message=chat_message_assistant(choice.message, tools),
-            stop_reason=as_stop_reason(choice.finish_reason),
-            logprobs=(
-                Logprobs(**choice.logprobs.model_dump())
-                if choice.logprobs is not None
-                else None
-            ),
-        )
-        for choice in choices
-    ]
-def chat_message_assistant(
-    message: ChatCompletionMessage, tools: list[ToolInfo]
-) -> ChatMessageAssistant:
-    return ChatMessageAssistant(
-        content=message.content or "",
-        source="generate",
-        tool_calls=chat_tool_calls(message, tools),
-    )
-async def as_chat_completion_part(
-    content: Content,
-) -> ChatCompletionContentPartParam:
-    if content.type == "text":
-        return ChatCompletionContentPartTextParam(type="text", text=content.text)
-    elif content.type == "image":
-        # API takes URL or base64 encoded file. If it's a remote file or
-        # data URL leave it alone, otherwise encode it
-        image_url = content.image
-        detail = content.detail
-        if not is_http_url(image_url):
-            image_url = await file_as_data_uri(image_url)
-        return ChatCompletionContentPartImageParam(
-            type="image_url",
-            image_url=dict(url=image_url, detail=detail),
-        )
-    elif content.type == "audio":
-        audio_data = await file_as_data_uri(content.audio)
-        return ChatCompletionContentPartInputAudioParam(
-            type="input_audio", input_audio=dict(data=audio_data, format=content.format)
-        )
-    else:
-        raise RuntimeError(
-            "Video content is not currently supported by Open AI chat models."
-        )
+            return e

inspect_ai/model/_providers/openai_o1.py CHANGED Viewed

@@ -24,15 +24,13 @@ from inspect_ai.model import (
 )
 from inspect_ai.tool import ToolCall, ToolInfo
+from .._call_tools import parse_tool_call, tool_parse_error_message
 from .._model_call import ModelCall
-from .._model_output import ModelUsage, StopReason
+from .._model_output import ModelUsage, StopReason, as_stop_reason
 from .._providers.util import (
     ChatAPIHandler,
     ChatAPIMessage,
-    as_stop_reason,
     chat_api_input,
-    parse_tool_call,
-    tool_parse_error_message,
 )
 logger = getLogger(__name__)
@@ -44,7 +42,7 @@ async def generate_o1(
     input: list[ChatMessage],
     tools: list[ToolInfo],
     **params: Any,
-) -> ModelOutput | tuple[ModelOutput, ModelCall]:
+) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
     # create chatapi handler
     handler = O1PreviewChatAPIHandler()
@@ -82,17 +80,18 @@ async def generate_o1(
     ), model_call()
-def handle_bad_request(model: str, ex: BadRequestError) -> ModelOutput:
+def handle_bad_request(model: str, ex: BadRequestError) -> ModelOutput | Exception:
     if ex.code == "context_length_exceeded":
-        stop_reason: StopReason = "model_length"
+        stop_reason: StopReason | None = "model_length"
     elif ex.code == "invalid_prompt":
         stop_reason = "content_filter"
-    else:
-        stop_reason = "unknown"
-    return ModelOutput.from_content(
-        model=model, content=str(ex), stop_reason=stop_reason
-    )
+    if stop_reason:
+        return ModelOutput.from_content(
+            model=model, content=str(ex), stop_reason=stop_reason
+        )
+    else:
+        return ex
 def chat_messages(

inspect_ai/model/_providers/providers.py CHANGED Viewed

@@ -94,7 +94,7 @@ def vertex() -> type[ModelAPI]:
 def google() -> type[ModelAPI]:
     FEATURE = "Google API"
     PACKAGE = "google-generativeai"
-    MIN_VERSION = "0.8.3"
+    MIN_VERSION = "0.8.4"
     # workaround log spam
     # https://github.com/ray-project/ray/issues/24917
@@ -239,6 +239,28 @@ def mockllm() -> type[ModelAPI]:
     return MockLLM
+@modelapi("goodfire")
+def goodfire() -> type[ModelAPI]:
+    """Get the Goodfire API provider."""
+    FEATURE = "Goodfire API"
+    PACKAGE = "goodfire"
+    MIN_VERSION = "0.3.4"  # Support for newer Llama models and OpenAI compatibility
+    # verify we have the package
+    try:
+        import goodfire  # noqa: F401
+    except ImportError:
+        raise pip_dependency_error(FEATURE, [PACKAGE])
+    # verify version
+    verify_required_version(FEATURE, PACKAGE, MIN_VERSION)
+    # in the clear
+    from .goodfire import GoodfireAPI
+    return GoodfireAPI
 def validate_openai_client(feature: str) -> None:
     FEATURE = feature
     PACKAGE = "openai"

inspect_ai/model/_providers/together.py CHANGED Viewed

@@ -24,13 +24,13 @@ from .._model_output import (
     ModelOutput,
     ModelUsage,
     StopReason,
+    as_stop_reason,
 )
+from .._openai import chat_message_assistant_from_openai
 from .openai import (
     OpenAIAPI,
-    chat_message_assistant,
 )
 from .util import (
-    as_stop_reason,
     chat_api_input,
     chat_api_request,
     environment_prerequisite_error,
@@ -68,7 +68,7 @@ def chat_choices_from_response_together(
         logprobs_models.append(Logprobs(content=logprobs_sequence))
     return [
         ChatCompletionChoice(
-            message=chat_message_assistant(choice.message, tools),
+            message=chat_message_assistant_from_openai(choice.message, tools),
             stop_reason=as_stop_reason(choice.finish_reason),
             logprobs=logprobs,
         )
@@ -99,22 +99,22 @@ class TogetherAIAPI(OpenAIAPI):
     # Together uses a default of 512 so we bump it up
     @override
-    def max_tokens(self) -> int:
+    def max_tokens(self) -> int | None:
         return DEFAULT_MAX_TOKENS
     @override
-    def handle_bad_request(self, ex: BadRequestError) -> ModelOutput:
-        if ex.status_code == 400 and "max_new_tokens" in ex.message:
-            response = ex.response.json()
-            if "error" in response and "message" in response.get("error"):
-                content = response.get("error").get("message")
-            else:
-                content = str(response)
+    def handle_bad_request(self, ex: BadRequestError) -> ModelOutput | Exception:
+        response = ex.response.json()
+        if "error" in response and "message" in response.get("error"):
+            content = response.get("error").get("message")
+        else:
+            content = str(response)
+        if "max_new_tokens" in ex.message:
             return ModelOutput.from_content(
                 model=self.model_name, content=content, stop_reason="model_length"
             )
         else:
-            raise ex
+            return ex
     # Together has a slightly different logprobs structure to OpenAI, so we need to remap it.
     def _chat_choices_from_response(

inspect_ai/model/_providers/util/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from ..._call_tools import parse_tool_call, tool_parse_error_message
+from ..._model_output import as_stop_reason
 from .chatapi import (
     ChatAPIHandler,
     ChatAPIMessage,
@@ -8,11 +10,8 @@ from .chatapi import (
 from .hf_handler import HFHandler
 from .llama31 import Llama31Handler
 from .util import (
-    as_stop_reason,
     environment_prerequisite_error,
     model_base_url,
-    parse_tool_call,
-    tool_parse_error_message,
 )
 __all__ = [

inspect_ai/model/_providers/util/hf_handler.py CHANGED Viewed

@@ -8,9 +8,9 @@ from typing_extensions import override
 from inspect_ai.tool._tool_call import ToolCall
 from inspect_ai.tool._tool_info import ToolInfo
+from ..._call_tools import parse_tool_call, tool_parse_error_message
 from ..._chat_message import ChatMessageAssistant
 from .chatapi import ChatAPIHandler
-from .util import parse_tool_call, tool_parse_error_message
 logger = getLogger(__name__)

inspect_ai/model/_providers/util/llama31.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing_extensions import override
 from inspect_ai.tool._tool_call import ToolCall
 from inspect_ai.tool._tool_info import ToolInfo
+from ..._call_tools import parse_tool_call, tool_parse_error_message
 from ..._chat_message import (
     ChatMessage,
     ChatMessageAssistant,
@@ -16,7 +17,6 @@ from ..._chat_message import (
     ChatMessageTool,
 )
 from .chatapi import ChatAPIHandler, ChatAPIMessage
-from .util import parse_tool_call, tool_parse_error_message
 logger = getLogger(__name__)

inspect_ai/model/_providers/util/util.py CHANGED Viewed

@@ -1,34 +1,11 @@
-import json
 import os
 from logging import getLogger
-from typing import Any
-import yaml
 from inspect_ai._util.error import PrerequisiteError
-from inspect_ai.tool._tool_call import ToolCall
-from inspect_ai.tool._tool_info import ToolInfo
-from ..._model_output import StopReason
 logger = getLogger(__name__)
-def as_stop_reason(reason: str | None) -> StopReason:
-    """Encode common reason strings into standard StopReason."""
-    match reason:
-        case "stop" | "eos":
-            return "stop"
-        case "length":
-            return "max_tokens"
-        case "tool_calls" | "function_call":
-            return "tool_calls"
-        case "content_filter" | "model_length" | "max_tokens":
-            return reason
-        case _:
-            return "unknown"
 def model_base_url(base_url: str | None, env_vars: str | list[str]) -> str | None:
     if base_url:
         return base_url
@@ -44,59 +21,6 @@ def model_base_url(base_url: str | None, env_vars: str | list[str]) -> str | Non
     return os.getenv("INSPECT_EVAL_MODEL_BASE_URL", None)
-def tool_parse_error_message(arguments: str, ex: Exception) -> str:
-    return f"Error parsing the following tool call arguments:\n\n{arguments}\n\nError details: {ex}"
-def parse_tool_call(
-    id: str, function: str, arguments: str, tools: list[ToolInfo]
-) -> ToolCall:
-    error: str | None = None
-    arguments_dict: dict[str, Any] = {}
-    def report_parse_error(ex: Exception) -> None:
-        nonlocal error
-        error = tool_parse_error_message(arguments, ex)
-        logger.info(error)
-    # if the arguments is a dict, then handle it with a plain json.loads
-    arguments = arguments.strip()
-    if arguments.startswith("{"):
-        try:
-            arguments_dict = json.loads(arguments)
-        except json.JSONDecodeError as ex:
-            report_parse_error(ex)
-    # otherwise parse it as yaml (which will pickup unquoted strings, numbers, and true/false)
-    # and then create a dict that maps it to the first function argument
-    else:
-        tool_info = next(
-            (
-                tool
-                for tool in tools
-                if tool.name == function and len(tool.parameters.properties) > 0
-            ),
-            None,
-        )
-        if tool_info:
-            param_names = list(tool_info.parameters.properties.keys())
-            try:
-                value = yaml.safe_load(arguments)
-                arguments_dict[param_names[0]] = value
-            except yaml.error.YAMLError:
-                # If the yaml parser fails, we treat it as a string argument.
-                arguments_dict[param_names[0]] = arguments
-    # return ToolCall with error payload
-    return ToolCall(
-        id=id,
-        function=function,
-        arguments=arguments_dict,
-        type="function",
-        parse_error=error,
-    )
 def environment_prerequisite_error(
     client: str, env_vars: str | list[str]
 ) -> PrerequisiteError:

inspect_ai/model/_providers/vertex.py CHANGED Viewed

@@ -23,7 +23,7 @@ from vertexai.generative_models import (  # type: ignore
 )
 from vertexai.generative_models import Content as VertexContent
-from inspect_ai._util.constants import BASE_64_DATA_REMOVED
+from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
 from inspect_ai._util.content import (
     Content,
     ContentAudio,
@@ -250,9 +250,6 @@ def consective_tool_message_reducer(
     return messages
-NO_CONTENT = "(no content)"
 async def content_dict(
     message: ChatMessageUser | ChatMessageAssistant | ChatMessageTool,
 ) -> VertexContent:

inspect_ai/scorer/_metric.py CHANGED Viewed

@@ -125,6 +125,9 @@ class SampleScore(BaseModel):
     sample_id: str | int | None = Field(default=None)
     """A sample id"""
+    scorer: str | None = Field(default=None)
+    """Registry name of scorer that created this score."""
 ValueToFloat = Callable[[Value], float]
 """Function used by metrics to translate from a Score value to a float value."""

inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl