PyPI - inspect-ai - Versions diffs - 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl - Mend

inspect-ai 0.3.82py3-none-any.whl → 0.3.83py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_display/textual/app.py +14 -3
inspect_ai/_display/textual/display.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +9 -3
inspect_ai/_display/textual/widgets/task_detail.py +3 -4
inspect_ai/_display/textual/widgets/tasks.py +17 -1
inspect_ai/_display/textual/widgets/vscode.py +44 -0
inspect_ai/_eval/eval.py +36 -24
inspect_ai/_eval/evalset.py +17 -18
inspect_ai/_eval/loader.py +34 -11
inspect_ai/_eval/run.py +8 -13
inspect_ai/_eval/score.py +13 -3
inspect_ai/_eval/task/generate.py +8 -9
inspect_ai/_eval/task/log.py +2 -0
inspect_ai/_eval/task/task.py +23 -9
inspect_ai/_util/file.py +13 -0
inspect_ai/_util/json.py +2 -1
inspect_ai/_util/registry.py +1 -0
inspect_ai/_util/vscode.py +37 -0
inspect_ai/_view/www/App.css +6 -0
inspect_ai/_view/www/dist/assets/index.css +304 -128
inspect_ai/_view/www/dist/assets/index.js +47495 -27519
inspect_ai/_view/www/log-schema.json +124 -31
inspect_ai/_view/www/package.json +3 -0
inspect_ai/_view/www/src/App.tsx +12 -0
inspect_ai/_view/www/src/appearance/icons.ts +1 -0
inspect_ai/_view/www/src/components/Card.tsx +6 -4
inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
inspect_ai/_view/www/src/components/Modal.module.css +38 -0
inspect_ai/_view/www/src/components/Modal.tsx +77 -0
inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
inspect_ai/_view/www/src/state/hooks.ts +5 -3
inspect_ai/_view/www/src/state/logPolling.ts +5 -1
inspect_ai/_view/www/src/state/logSlice.ts +10 -0
inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
inspect_ai/_view/www/src/types/log.d.ts +34 -26
inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
inspect_ai/_view/www/yarn.lock +94 -1
inspect_ai/agent/__init__.py +36 -0
inspect_ai/agent/_agent.py +268 -0
inspect_ai/agent/_as_solver.py +72 -0
inspect_ai/agent/_as_tool.py +122 -0
inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
inspect_ai/agent/_filter.py +46 -0
inspect_ai/agent/_handoff.py +93 -0
inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
inspect_ai/agent/_react.py +241 -0
inspect_ai/agent/_run.py +36 -0
inspect_ai/agent/_types.py +81 -0
inspect_ai/log/_log.py +11 -2
inspect_ai/log/_transcript.py +13 -9
inspect_ai/model/__init__.py +7 -1
inspect_ai/model/_call_tools.py +256 -52
inspect_ai/model/_chat_message.py +7 -4
inspect_ai/model/_conversation.py +13 -62
inspect_ai/model/_display.py +85 -0
inspect_ai/model/_model.py +113 -14
inspect_ai/model/_model_output.py +14 -9
inspect_ai/model/_openai.py +16 -4
inspect_ai/model/_openai_computer_use.py +162 -0
inspect_ai/model/_openai_responses.py +319 -165
inspect_ai/model/_providers/anthropic.py +20 -21
inspect_ai/model/_providers/azureai.py +24 -13
inspect_ai/model/_providers/bedrock.py +1 -7
inspect_ai/model/_providers/cloudflare.py +3 -3
inspect_ai/model/_providers/goodfire.py +2 -6
inspect_ai/model/_providers/google.py +11 -10
inspect_ai/model/_providers/groq.py +6 -3
inspect_ai/model/_providers/hf.py +7 -3
inspect_ai/model/_providers/mistral.py +7 -10
inspect_ai/model/_providers/openai.py +47 -17
inspect_ai/model/_providers/openai_o1.py +11 -4
inspect_ai/model/_providers/openai_responses.py +12 -14
inspect_ai/model/_providers/providers.py +2 -2
inspect_ai/model/_providers/together.py +12 -2
inspect_ai/model/_providers/util/chatapi.py +7 -2
inspect_ai/model/_providers/util/hf_handler.py +4 -2
inspect_ai/model/_providers/util/llama31.py +4 -2
inspect_ai/model/_providers/vertex.py +11 -9
inspect_ai/model/_providers/vllm.py +4 -4
inspect_ai/scorer/__init__.py +2 -0
inspect_ai/scorer/_metrics/__init__.py +2 -0
inspect_ai/scorer/_metrics/grouped.py +84 -0
inspect_ai/scorer/_score.py +26 -6
inspect_ai/solver/__init__.py +2 -2
inspect_ai/solver/_basic_agent.py +22 -9
inspect_ai/solver/_bridge.py +31 -0
inspect_ai/solver/_chain.py +20 -12
inspect_ai/solver/_fork.py +5 -1
inspect_ai/solver/_human_agent.py +52 -0
inspect_ai/solver/_prompt.py +3 -1
inspect_ai/solver/_run.py +59 -0
inspect_ai/solver/_solver.py +14 -4
inspect_ai/solver/_task_state.py +5 -3
inspect_ai/tool/_tool_call.py +15 -8
inspect_ai/tool/_tool_def.py +17 -12
inspect_ai/tool/_tool_support_helpers.py +2 -2
inspect_ai/tool/_tool_with.py +14 -11
inspect_ai/tool/_tools/_bash_session.py +11 -2
inspect_ai/tool/_tools/_computer/_common.py +18 -2
inspect_ai/tool/_tools/_computer/_computer.py +18 -2
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_anyio.py +27 -0
inspect_ai/util/_sandbox/__init__.py +2 -1
inspect_ai/util/_sandbox/context.py +32 -7
inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_sandbox/docker/docker.py +12 -1
inspect_ai/util/_store_model.py +30 -7
inspect_ai/util/_subprocess.py +13 -3
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +179 -153
inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
/inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import re
 from copy import copy
 from logging import getLogger
-from typing import Any, Literal, NamedTuple, Optional, Tuple, cast
+from typing import Any, Literal, Optional, Tuple, cast
 import httpcore
 import httpx
@@ -153,7 +153,7 @@ class AnthropicAPI(ModelAPI):
         self._http_hooks = HttpxHooks(self.client._client)
     @override
-    async def close(self) -> None:
+    async def aclose(self) -> None:
         await self.client.close()
     def is_bedrock(self) -> bool:
@@ -639,11 +639,7 @@ def message_tool_choice(
     elif tool_choice == "any":
         return {"type": "any"}
     elif tool_choice == "none":
-        warn_once(
-            logger,
-            'The Anthropic API does not support tool_choice="none" (using "auto" instead)',
-        )
-        return {"type": "auto"}
+        return {"type": "none"}
     else:
         return {"type": "auto"}
@@ -723,11 +719,12 @@ async def message_param(message: ChatMessage) -> MessageParam:
         # now add tools
         for tool_call in message.tool_calls:
+            internal_name = _internal_name_from_tool_call(tool_call)
             tools_content.append(
                 ToolUseBlockParam(
                     type="tool_use",
                     id=tool_call.id,
-                    name=tool_call.internal_name or tool_call.function,
+                    name=internal_name or tool_call.function,
                     input=tool_call.arguments,
                 )
             )
@@ -774,14 +771,13 @@ async def model_output_from_message(
             content.append(ContentText(type="text", text=content_text))
         elif isinstance(content_block, ToolUseBlock):
             tool_calls = tool_calls or []
-            info = maybe_mapped_call_info(content_block.name, tools)
+            (tool_name, internal_name) = _names_for_tool_call(content_block.name, tools)
             tool_calls.append(
                 ToolCall(
-                    type=info.internal_type,
                     id=content_block.id,
-                    function=info.inspect_name,
-                    internal_name=info.internal_name,
+                    function=tool_name,
                     arguments=content_block.model_dump().get("input", {}),
+                    internal=internal_name,
                 )
             )
         elif isinstance(content_block, RedactedThinkingBlock):
@@ -801,7 +797,7 @@ async def model_output_from_message(
     # resolve choice
     choice = ChatCompletionChoice(
         message=ChatMessageAssistant(
-            content=content, tool_calls=tool_calls, source="generate"
+            content=content, tool_calls=tool_calls, model=model, source="generate"
         ),
         stop_reason=message_stop_reason(message),
     )
@@ -831,15 +827,18 @@ async def model_output_from_message(
     )
-class CallInfo(NamedTuple):
-    internal_name: str | None
-    internal_type: str
-    inspect_name: str
+def _internal_name_from_tool_call(tool_call: ToolCall) -> str | None:
+    assert isinstance(tool_call.internal, str | None), (
+        f"ToolCall internal must be `str | None`: {tool_call.internal}"
+    )
+    return tool_call.internal
-def maybe_mapped_call_info(tool_called: str, tools: list[ToolInfo]) -> CallInfo:
+def _names_for_tool_call(
+    tool_called: str, tools: list[ToolInfo]
+) -> tuple[str, str | None]:
     """
-    Return call info - potentially transformed by native tool mappings.
+    Return the name of the tool to call and potentially an internal name.
     Anthropic prescribes names for their native tools - `computer`, `bash`, and
     `str_replace_editor`. For a variety of reasons, Inspect's tool names to not
@@ -854,11 +853,11 @@ def maybe_mapped_call_info(tool_called: str, tools: list[ToolInfo]) -> CallInfo:
     return next(
         (
-            CallInfo(entry[0], entry[1], entry[2])
+            (entry[2], entry[0])
             for entry in mappings
             if entry[0] == tool_called and any(tool.name == entry[2] for tool in tools)
         ),
-        CallInfo(None, "function", tool_called),
+        (tool_called, None),
     )

inspect_ai/model/_providers/azureai.py CHANGED Viewed

@@ -129,11 +129,6 @@ class AzureAIAPI(ModelAPI):
         self.endpoint_url = endpoint_url
         self.model_args = model_args
-    @override
-    async def close(self) -> None:
-        # client is created/destroyed each time in generate()
-        pass
     async def generate(
         self,
         input: list[ChatMessage],
@@ -143,9 +138,9 @@ class AzureAIAPI(ModelAPI):
     ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
         # emulate tools (auto for llama, opt-in for others)
         if self.emulate_tools is None and self.is_llama():
-            handler: ChatAPIHandler | None = Llama31Handler()
+            handler: ChatAPIHandler | None = Llama31Handler(self.model_name)
         elif self.emulate_tools:
-            handler = Llama31Handler()
+            handler = Llama31Handler(self.model_name)
         else:
             handler = None
@@ -190,7 +185,9 @@ class AzureAIAPI(ModelAPI):
             response: ChatCompletions = await client.complete(**request)
             return ModelOutput(
                 model=response.model,
-                choices=chat_completion_choices(response.choices, tools, handler),
+                choices=chat_completion_choices(
+                    response.model, response.choices, tools, handler
+                ),
                 usage=ModelUsage(
                     input_tokens=response.usage.prompt_tokens,
                     output_tokens=response.usage.completion_tokens,
@@ -368,24 +365,37 @@ def chat_tool_choice(
 def chat_completion_choices(
-    choices: list[ChatChoice], tools: list[ToolInfo], handler: ChatAPIHandler | None
+    model: str,
+    choices: list[ChatChoice],
+    tools: list[ToolInfo],
+    handler: ChatAPIHandler | None,
 ) -> list[ChatCompletionChoice]:
     choices = copy(choices)
     choices.sort(key=lambda c: c.index)
-    return [chat_complection_choice(choice, tools, handler) for choice in choices]
+    return [
+        chat_complection_choice(model, choice, tools, handler) for choice in choices
+    ]
 def chat_complection_choice(
-    choice: ChatChoice, tools: list[ToolInfo], handler: ChatAPIHandler | None
+    model: str,
+    choice: ChatChoice,
+    tools: list[ToolInfo],
+    handler: ChatAPIHandler | None,
 ) -> ChatCompletionChoice:
     return ChatCompletionChoice(
-        message=chat_completion_assistant_message(choice.message, tools, handler),
+        message=chat_completion_assistant_message(
+            model, choice.message, tools, handler
+        ),
         stop_reason=chat_completion_stop_reason(choice.finish_reason),
     )
 def chat_completion_assistant_message(
-    response: ChatResponseMessage, tools: list[ToolInfo], handler: ChatAPIHandler | None
+    model: str,
+    response: ChatResponseMessage,
+    tools: list[ToolInfo],
+    handler: ChatAPIHandler | None,
 ) -> ChatMessageAssistant:
     if handler:
         return handler.parse_assistant_response(response.content, tools)
@@ -397,6 +407,7 @@ def chat_completion_assistant_message(
             ]
             if response.tool_calls is not None
             else None,
+            model=model,
         )

inspect_ai/model/_providers/bedrock.py CHANGED Viewed

@@ -269,11 +269,6 @@ class BedrockAPI(ModelAPI):
         except ImportError:
             raise pip_dependency_error("Bedrock API", ["aioboto3"])
-    @override
-    async def close(self) -> None:
-        # client is created/destroyed each time in generate()
-        pass
     @override
     def connection_key(self) -> str:
         return self.model_name
@@ -454,7 +449,6 @@ def model_output_from_response(
             tool_calls.append(
                 ToolCall(
                     id=c.toolUse.toolUseId,
-                    type="function",
                     function=c.toolUse.name,
                     arguments=cast(dict[str, Any], c.toolUse.input or {}),
                 )
@@ -465,7 +459,7 @@ def model_output_from_response(
     # resolve choice
     choice = ChatCompletionChoice(
         message=ChatMessageAssistant(
-            content=content, tool_calls=tool_calls, source="generate"
+            content=content, tool_calls=tool_calls, model=model, source="generate"
         ),
         stop_reason=message_stop_reason(response.stopReason),
     )

inspect_ai/model/_providers/cloudflare.py CHANGED Viewed

@@ -59,7 +59,7 @@ class CloudFlareAPI(ModelAPI):
         self.model_args = model_args
     @override
-    async def close(self) -> None:
+    async def aclose(self) -> None:
         await self.client.aclose()
     async def generate(
@@ -141,6 +141,6 @@ class CloudFlareAPI(ModelAPI):
     def chat_api_handler(self) -> ChatAPIHandler:
         if "llama" in self.model_name.lower():
-            return Llama31Handler()
+            return Llama31Handler(self.model_name)
         else:
-            return ChatAPIHandler()
+            return ChatAPIHandler(self.model_name)

inspect_ai/model/_providers/goodfire.py CHANGED Viewed

@@ -115,11 +115,6 @@ class GoodfireAPI(ModelAPI):
         # Initialize variant directly with model name
         self.variant = Variant(self.model_name)  # type: ignore
-    @override
-    async def close(self) -> None:
-        # httpx.AsyncClient is created on each generate()
-        pass
     def _to_goodfire_message(self, message: ChatMessage) -> GoodfireChatMessage:
         """Convert an Inspect message to a Goodfire message format.
@@ -232,7 +227,8 @@ class GoodfireAPI(ModelAPI):
                 choices=[
                     ChatCompletionChoice(
                         message=ChatMessageAssistant(
-                            content=response_dict["choices"][0]["message"]["content"]
+                            content=response_dict["choices"][0]["message"]["content"],
+                            model=self.model_name,
                         ),
                         stop_reason="stop",
                     )

inspect_ai/model/_providers/google.py CHANGED Viewed

@@ -183,11 +183,6 @@ class GoogleGenAIAPI(ModelAPI):
         # save model args
         self.model_args = model_args
-    @override
-    async def close(self) -> None:
-        # GenerativeModel uses a cached/shared client so there is no 'close'
-        pass
     def is_vertex(self) -> bool:
         return self.service == "vertex"
@@ -257,9 +252,10 @@ class GoogleGenAIAPI(ModelAPI):
         except ClientError as ex:
             return self.handle_client_error(ex), model_call()
+        model_name = response.model_version or self.model_name
         output = ModelOutput(
-            model=self.model_name,
-            choices=completion_choices_from_candidates(response),
+            model=model_name,
+            choices=completion_choices_from_candidates(model_name, response),
             usage=usage_metadata_to_model_usage(response.usage_metadata),
         )
@@ -546,7 +542,9 @@ def chat_tool_config(tool_choice: ToolChoice) -> ToolConfig:
         )
-def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoice:
+def completion_choice_from_candidate(
+    model: str, candidate: Candidate
+) -> ChatCompletionChoice:
     # content can be None when the finish_reason is SAFETY
     if candidate.content is None:
         content = ""
@@ -572,7 +570,6 @@ def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoi
             if part.function_call:
                 tool_calls.append(
                     ToolCall(
-                        type="function",
                         id=part.function_call.name,
                         function=part.function_call.name,
                         arguments=part.function_call.args,
@@ -596,6 +593,7 @@ def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoi
         message=ChatMessageAssistant(
             content=choice_content,
             tool_calls=tool_calls if len(tool_calls) > 0 else None,
+            model=model,
             source="generate",
         ),
         stop_reason=stop_reason,
@@ -624,19 +622,22 @@ def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoi
 def completion_choices_from_candidates(
+    model: str,
     response: GenerateContentResponse,
 ) -> list[ChatCompletionChoice]:
     candidates = response.candidates
     if candidates:
         candidates_list = sorted(candidates, key=lambda c: c.index)
         return [
-            completion_choice_from_candidate(candidate) for candidate in candidates_list
+            completion_choice_from_candidate(model, candidate)
+            for candidate in candidates_list
         ]
     elif response.prompt_feedback:
         return [
             ChatCompletionChoice(
                 message=ChatMessageAssistant(
                     content=prompt_feedback_to_content(response.prompt_feedback),
+                    model=model,
                     source="generate",
                 ),
                 stop_reason="content_filter",

inspect_ai/model/_providers/groq.py CHANGED Viewed

@@ -93,7 +93,7 @@ class GroqAPI(ModelAPI):
         self._http_hooks = HttpxHooks(self.client._client)
     @override
-    async def close(self) -> None:
+    async def aclose(self) -> None:
         await self.client.close()
     async def generate(
@@ -203,7 +203,7 @@ class GroqAPI(ModelAPI):
         choices.sort(key=lambda c: c.index)
         return [
             ChatCompletionChoice(
-                message=chat_message_assistant(choice.message, tools),
+                message=chat_message_assistant(self.model_name, choice.message, tools),
                 stop_reason=as_stop_reason(choice.finish_reason),
             )
             for choice in choices
@@ -323,7 +323,9 @@ def chat_tool_calls(message: Any, tools: list[ToolInfo]) -> Optional[List[ToolCa
     return None
-def chat_message_assistant(message: Any, tools: list[ToolInfo]) -> ChatMessageAssistant:
+def chat_message_assistant(
+    model: str, message: Any, tools: list[ToolInfo]
+) -> ChatMessageAssistant:
     reasoning = getattr(message, "reasoning", None)
     if reasoning is not None:
         content: str | list[Content] = [
@@ -335,6 +337,7 @@ def chat_message_assistant(message: Any, tools: list[ToolInfo]) -> ChatMessageAs
     return ChatMessageAssistant(
         content=content,
+        model=model,
         source="generate",
         tool_calls=chat_tool_calls(message, tools),
     )

inspect_ai/model/_providers/hf.py CHANGED Viewed

@@ -123,7 +123,7 @@ class HuggingFaceAPI(ModelAPI):
         self.tokenizer.padding_side = "left"
     @override
-    async def close(self) -> None:
+    def close(self) -> None:
         self.model = None
         self.tokenizer = None
         gc.collect()
@@ -205,7 +205,9 @@ class HuggingFaceAPI(ModelAPI):
         # construct choice
         choice = ChatCompletionChoice(
-            message=ChatMessageAssistant(content=response.output, source="generate"),
+            message=ChatMessageAssistant(
+                content=response.output, model=self.model_name, source="generate"
+            ),
             logprobs=(
                 Logprobs(content=final_logprobs) if final_logprobs is not None else None
             ),
@@ -338,7 +340,9 @@ def chat_completion_assistant_message(
     if handler:
         return handler.parse_assistant_response(response.output, tools)
     else:
-        return ChatMessageAssistant(content=response.output, source="generate")
+        return ChatMessageAssistant(
+            content=response.output, model=model_name, source="generate"
+        )
 def set_random_seeds(seed: int | None = None) -> None:

inspect_ai/model/_providers/mistral.py CHANGED Viewed

@@ -135,11 +135,6 @@ class MistralAPI(ModelAPI):
     def is_azure(self) -> bool:
         return self.service == "azure"
-    @override
-    async def close(self) -> None:
-        # client is created and destroyed in generate
-        pass
     async def generate(
         self,
         input: list[ChatMessage],
@@ -448,13 +443,11 @@ def chat_tool_call(tool_call: MistralToolCall, tools: list[ToolInfo]) -> ToolCal
             id, tool_call.function.name, tool_call.function.arguments, tools
         )
     else:
-        return ToolCall(
-            id, tool_call.function.name, tool_call.function.arguments, type="function"
-        )
+        return ToolCall(id, tool_call.function.name, tool_call.function.arguments)
 def completion_choice(
-    choice: MistralChatCompletionChoice, tools: list[ToolInfo]
+    model: str, choice: MistralChatCompletionChoice, tools: list[ToolInfo]
 ) -> ChatCompletionChoice:
     message = choice.message
     if message:
@@ -465,6 +458,7 @@ def completion_choice(
                 tool_calls=chat_tool_calls(message.tool_calls, tools)
                 if message.tool_calls
                 else None,
+                model=model,
                 source="generate",
             ),
             stop_reason=(
@@ -511,7 +505,10 @@ def completion_choices_from_response(
     if response.choices is None:
         return []
     else:
-        return [completion_choice(choice, tools) for choice in response.choices]
+        return [
+            completion_choice(response.model, choice, tools)
+            for choice in response.choices
+        ]
 def choice_stop_reason(choice: MistralChatCompletionChoice) -> StopReason:

inspect_ai/model/_providers/openai.py CHANGED Viewed

@@ -33,6 +33,7 @@ from .._model_call import ModelCall
 from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage
 from .._openai import (
     OpenAIResponseError,
+    is_computer_use_preview,
     is_gpt,
     is_o1_mini,
     is_o1_preview,
@@ -45,10 +46,7 @@ from .._openai import (
     openai_media_filter,
 )
 from .openai_o1 import generate_o1
-from .util import (
-    environment_prerequisite_error,
-    model_base_url,
-)
+from .util import environment_prerequisite_error, model_base_url
 logger = getLogger(__name__)
@@ -77,9 +75,6 @@ class OpenAIAPI(ModelAPI):
         else:
             self.service = None
-        # note whether we are forcing the responses_api
-        self.responses_api = True if responses_api else False
         # call super
         super().__init__(
             model_name=model_name,
@@ -89,6 +84,11 @@ class OpenAIAPI(ModelAPI):
             config=config,
         )
+        # note whether we are forcing the responses_api
+        self.responses_api = (
+            responses_api or self.is_o1_pro() or self.is_computer_use_preview()
+        )
         # resolve api_key
         if not self.api_key:
             if self.service == "azure":
@@ -128,10 +128,14 @@ class OpenAIAPI(ModelAPI):
                 )
             # resolve version
-            api_version = os.environ.get(
-                "AZUREAI_OPENAI_API_VERSION",
-                os.environ.get("OPENAI_API_VERSION", "2025-02-01-preview"),
-            )
+            if model_args.get("api_version") is not None:
+                # use slightly complicated logic to allow for "api_version" to be removed
+                api_version = model_args.pop("api_version")
+            else:
+                api_version = os.environ.get(
+                    "AZUREAI_OPENAI_API_VERSION",
+                    os.environ.get("OPENAI_API_VERSION", "2025-02-01-preview"),
+                )
             self.client: AsyncAzureOpenAI | AsyncOpenAI = AsyncAzureOpenAI(
                 api_key=self.api_key,
@@ -166,13 +170,33 @@ class OpenAIAPI(ModelAPI):
     def is_o1_preview(self) -> bool:
         return is_o1_preview(self.model_name)
+    def is_computer_use_preview(self) -> bool:
+        return is_computer_use_preview(self.model_name)
     def is_gpt(self) -> bool:
         return is_gpt(self.model_name)
     @override
-    async def close(self) -> None:
+    async def aclose(self) -> None:
         await self.client.close()
+    @override
+    def emulate_reasoning_history(self) -> bool:
+        return not self.responses_api
+    @override
+    def tool_result_images(self) -> bool:
+        # o1-pro, o1, and computer_use_preview support image inputs (but we're not strictly supporting o1)
+        return self.is_o1_pro() or self.is_computer_use_preview()
+    @override
+    def disable_computer_screenshot_truncation(self) -> bool:
+        # Because ComputerCallOutput has a required output field of type
+        # ResponseComputerToolCallOutputScreenshot, we must have an image in
+        # order to provide a valid tool call response. Therefore, we cannot
+        # support image truncation.
+        return True
     async def generate(
         self,
         input: list[ChatMessage],
@@ -188,7 +212,7 @@ class OpenAIAPI(ModelAPI):
                 tools=tools,
                 **self.completion_params(config, False),
             )
-        elif self.is_o1_pro() or self.responses_api:
+        elif self.responses_api:
             return await generate_responses(
                 client=self.client,
                 http_hooks=self._http_hooks,
@@ -344,10 +368,7 @@ class OpenAIAPI(ModelAPI):
             params["top_p"] = config.top_p
         if config.num_choices is not None:
             params["n"] = config.num_choices
-        if config.logprobs is not None:
-            params["logprobs"] = config.logprobs
-        if config.top_logprobs is not None:
-            params["top_logprobs"] = config.top_logprobs
+        params = self.set_logprobs_params(params, config)
         if tools and config.parallel_tool_calls is not None and not self.is_o_series():
             params["parallel_tool_calls"] = config.parallel_tool_calls
         if (
@@ -372,6 +393,15 @@ class OpenAIAPI(ModelAPI):
         return params
+    def set_logprobs_params(
+        self, params: dict[str, Any], config: GenerateConfig
+    ) -> dict[str, Any]:
+        if config.logprobs is not None:
+            params["logprobs"] = config.logprobs
+        if config.top_logprobs is not None:
+            params["top_logprobs"] = config.top_logprobs
+        return params
 class OpenAIAsyncHttpxClient(httpx.AsyncClient):
     """Custom async client that deals better with long running Async requests.

inspect_ai/model/_providers/openai_o1.py CHANGED Viewed

@@ -40,7 +40,7 @@ async def generate_o1(
     **params: Any,
 ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
     # create chatapi handler
-    handler = O1PreviewChatAPIHandler()
+    handler = O1PreviewChatAPIHandler(model)
     # call model
     request = dict(
@@ -155,6 +155,9 @@ TOOL_CALL = "tool_call"
 class O1PreviewChatAPIHandler(ChatAPIHandler):
+    def __init__(self, model: str) -> None:
+        self.model = model
     @override
     def input_with_tools(
         self, input: list[ChatMessage], tools: list[ToolInfo]
@@ -234,12 +237,17 @@ class O1PreviewChatAPIHandler(ChatAPIHandler):
             # return the message
             return ChatMessageAssistant(
-                content=content, tool_calls=tool_calls, source="generate"
+                content=content,
+                tool_calls=tool_calls,
+                model=self.model,
+                source="generate",
             )
         # otherwise this is just an ordinary assistant message
         else:
-            return ChatMessageAssistant(content=response, source="generate")
+            return ChatMessageAssistant(
+                content=response, model=self.model, source="generate"
+            )
     @override
     def assistant_message(self, message: ChatMessageAssistant) -> ChatAPIMessage:
@@ -328,6 +336,5 @@ def parse_tool_call_content(content: str, tools: list[ToolInfo]) -> ToolCall:
             id="unknown",
             function="unknown",
             arguments={},
-            type="function",
             parse_error=parse_error,
         )

inspect-ai 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl

inspect-ai 0.3.82py3-none-any.whl → 0.3.83py3-none-any.whl