PyPI - inspect-ai - Versions diffs - 0.3.60__py3-none-any.whl → 0.3.62__py3-none-any.whl - Mend

inspect-ai 0.3.60py3-none-any.whl → 0.3.62py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

inspect_ai/_cli/eval.py +13 -1
inspect_ai/_cli/view.py +4 -0
inspect_ai/_display/textual/widgets/transcript.py +15 -9
inspect_ai/_eval/task/error.py +10 -14
inspect_ai/_eval/task/generate.py +41 -35
inspect_ai/_eval/task/run.py +20 -12
inspect_ai/_util/hooks.py +17 -7
inspect_ai/_util/transcript.py +11 -0
inspect_ai/_view/www/dist/assets/index.css +1 -0
inspect_ai/_view/www/dist/assets/index.js +100 -94
inspect_ai/_view/www/log-schema.json +35 -19
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/components/ChatView.mjs +23 -0
inspect_ai/_view/www/src/types/log.d.ts +6 -4
inspect_ai/log/_recorders/eval.py +1 -1
inspect_ai/model/_chat_message.py +29 -2
inspect_ai/model/_conversation.py +10 -3
inspect_ai/model/_generate_config.py +6 -0
inspect_ai/model/_model.py +164 -25
inspect_ai/model/_openai.py +33 -1
inspect_ai/model/_providers/anthropic.py +12 -3
inspect_ai/model/_providers/groq.py +4 -0
inspect_ai/model/_providers/openai.py +21 -9
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_reasoning.py +17 -0
inspect_ai/solver/__init__.py +2 -0
inspect_ai/solver/_basic_agent.py +78 -58
inspect_ai/{util → solver}/_limit.py +13 -0
inspect_ai/solver/_task_state.py +37 -7
inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
inspect_ai/tool/beta/_computer/_resources/Dockerfile +5 -3
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +1 -1
inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/Code/User/settings.json +3 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +61 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +10 -0
inspect_ai/util/__init__.py +0 -2
inspect_ai/util/_sandbox/self_check.py +51 -28
{inspect_ai-0.3.60.dist-info → inspect_ai-0.3.62.dist-info}/METADATA +2 -2
{inspect_ai-0.3.60.dist-info → inspect_ai-0.3.62.dist-info}/RECORD +45 -40
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +0 -10
{inspect_ai-0.3.60.dist-info → inspect_ai-0.3.62.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.60.dist-info → inspect_ai-0.3.62.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.60.dist-info → inspect_ai-0.3.62.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.60.dist-info → inspect_ai-0.3.62.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/log-schema.json CHANGED Viewed

@@ -260,13 +260,26 @@
           ],
           "default": null,
           "title": "Tool Calls"
+        },
+        "reasoning": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Reasoning"
         }
       },
       "required": [
         "content",
         "source",
         "role",
-        "tool_calls"
+        "tool_calls",
+        "reasoning"
       ],
       "title": "ChatMessageAssistant",
       "type": "object",
@@ -486,7 +499,10 @@
         "tool_call_id": {
           "anyOf": [
             {
-              "type": "string"
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
             },
             {
               "type": "null"
@@ -1131,7 +1147,6 @@
             "presence_penalty": null,
             "logit_bias": null,
             "seed": null,
-            "suffix": null,
             "top_k": null,
             "num_choices": null,
             "logprobs": null,
@@ -1140,7 +1155,8 @@
             "internal_tools": null,
             "max_tool_output": null,
             "cache_prompt": null,
-            "reasoning_effort": null
+            "reasoning_effort": null,
+            "reasoning_history": null
           }
         }
       },
@@ -2120,18 +2136,6 @@
           "default": null,
           "title": "Seed"
         },
-        "suffix": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Suffix"
-        },
         "top_k": {
           "anyOf": [
             {
@@ -2248,6 +2252,18 @@
           ],
           "default": null,
           "title": "Reasoning Effort"
+        },
+        "reasoning_history": {
+          "anyOf": [
+            {
+              "type": "boolean"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Reasoning History"
         }
       },
       "title": "GenerateConfig",
@@ -2266,7 +2282,6 @@
         "presence_penalty",
         "logit_bias",
         "seed",
-        "suffix",
         "top_k",
         "num_choices",
         "logprobs",
@@ -2275,7 +2290,8 @@
         "internal_tools",
         "max_tool_output",
         "cache_prompt",
-        "reasoning_effort"
+        "reasoning_effort",
+        "reasoning_history"
       ],
       "additionalProperties": false
     },
@@ -4247,9 +4263,9 @@
           "parallel_tool_calls": null,
           "presence_penalty": null,
           "reasoning_effort": null,
+          "reasoning_history": null,
           "seed": null,
           "stop_seqs": null,
-          "suffix": null,
           "system_message": null,
           "temperature": null,
           "timeout": null,

inspect_ai/_view/www/package.json CHANGED Viewed

@@ -26,7 +26,7 @@
   },
   "dependencies": {
     "@popperjs/core": "^2.11.8",
-    "asciinema-player": "^3.8.1",
+    "asciinema-player": "^3.8.2",
     "bootstrap": "^5.3.3",
     "bootstrap-icons": "^1.11.3",
     "clipboard": "^2.0.11",

inspect_ai/_view/www/src/components/ChatView.mjs CHANGED Viewed

@@ -8,6 +8,7 @@ import { ExpandablePanel } from "./ExpandablePanel.mjs";
 import { FontSize, TextStyle } from "../appearance/Fonts.mjs";
 import { resolveToolInput, ToolCallView } from "./Tools.mjs";
 import { VirtualList } from "./VirtualList.mjs";
+import { MarkdownDiv } from "./MarkdownDiv.mjs";
 /**
  * Renders the ChatViewVirtualList component.
@@ -282,7 +283,29 @@ const ChatMessage = ({
         <i class="${iconForMsg(message)}"></i>
         ${message.role}
       </div>
+      ${
+        message.role === "assistant" && message.reasoning
+          ? html` <div
+              style=${{
+                marginLeft: indented ? "1.1rem" : "0",
+                paddingBottom: "0.8rem",
+              }}
+            >
+              <div style=${{ ...TextStyle.label, ...TextStyle.secondary }}>Reasoning</div>
+              <${ExpandablePanel} collapse=${true}><${MarkdownDiv} markdown=${message.reasoning}/></${ExpandablePanel}>
+            </div>`
+          : undefined
+      }
       <div style=${{ marginLeft: indented ? "1.1rem" : "0", paddingBottom: indented ? "0.8rem" : "0" }}>
+      ${
+        message.role === "assistant" && message.reasoning
+          ? html`<div style=${{ ...TextStyle.label, ...TextStyle.secondary }}>
+              Response
+            </div>`
+          : ""
+      }
       <${ExpandablePanel} collapse=${collapse}>
         <${MessageContents}
           key=${`${id}-contents`}

inspect_ai/_view/www/src/types/log.d.ts CHANGED Viewed

@@ -70,7 +70,6 @@ export type LogitBias = {
   [k: string]: number;
 } | null;
 export type Seed = number | null;
-export type Suffix = string | null;
 export type TopK = number | null;
 export type NumChoices = number | null;
 export type Logprobs = boolean | null;
@@ -80,6 +79,7 @@ export type InternalTools = boolean | null;
 export type MaxToolOutput = number | null;
 export type CachePrompt = "auto" | boolean | null;
 export type ReasoningEffort = ("low" | "medium" | "high") | null;
+export type ReasoningHistory = boolean | null;
 export type TotalSamples = number;
 export type CompletedSamples = number;
 export type Name3 = string;
@@ -133,7 +133,7 @@ export type Content1 =
   | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
 export type Source1 = ("input" | "generate") | null;
 export type Role1 = "user";
-export type ToolCallId = string | null;
+export type ToolCallId = string[] | null;
 export type Content2 =
   | string
   | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
@@ -147,6 +147,7 @@ export type ParseError = string | null;
 export type Title = string | null;
 export type Format2 = "text" | "markdown";
 export type Content3 = string;
+export type Reasoning = string | null;
 export type Content4 =
   | string
   | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
@@ -547,7 +548,6 @@ export interface GenerateConfig {
   presence_penalty: PresencePenalty;
   logit_bias: LogitBias;
   seed: Seed;
-  suffix: Suffix;
   top_k: TopK;
   num_choices: NumChoices;
   logprobs: Logprobs;
@@ -557,6 +557,7 @@ export interface GenerateConfig {
   max_tool_output: MaxToolOutput;
   cache_prompt: CachePrompt;
   reasoning_effort: ReasoningEffort;
+  reasoning_history: ReasoningHistory;
 }
 export interface EvalResults {
   total_samples: TotalSamples;
@@ -658,6 +659,7 @@ export interface ChatMessageAssistant {
   source: Source2;
   role: Role2;
   tool_calls: ToolCalls;
+  reasoning: Reasoning;
 }
 export interface ToolCall {
   id: Id1;
@@ -901,7 +903,6 @@ export interface GenerateConfig1 {
   presence_penalty: PresencePenalty;
   logit_bias: LogitBias;
   seed: Seed;
-  suffix: Suffix;
   top_k: TopK;
   num_choices: NumChoices;
   logprobs: Logprobs;
@@ -911,6 +912,7 @@ export interface GenerateConfig1 {
   max_tool_output: MaxToolOutput;
   cache_prompt: CachePrompt;
   reasoning_effort: ReasoningEffort;
+  reasoning_history: ReasoningHistory;
 }
 /**
  * Model call (raw request/response data).

inspect_ai/log/_recorders/eval.py CHANGED Viewed

@@ -203,7 +203,7 @@ class EvalRecorder(FileRecorder):
         # of small fetches from the zip file streams)
         temp_log: str | None = None
         fs = filesystem(location)
-        if not fs.is_local():
+        if not fs.is_local() and header_only is False:
             with tempfile.NamedTemporaryFile(delete=False) as temp:
                 temp_log = temp.name
                 fs.get_file(location, temp_log)

inspect_ai/model/_chat_message.py CHANGED Viewed

@@ -7,6 +7,8 @@ from inspect_ai._util.content import Content, ContentText
 from inspect_ai.tool import ToolCall
 from inspect_ai.tool._tool_call import ToolCallError
+from ._reasoning import parse_content_with_reasoning
 logger = getLogger(__name__)
@@ -72,8 +74,8 @@ class ChatMessageUser(ChatMessageBase):
     role: Literal["user"] = Field(default="user")
     """Conversation role."""
-    tool_call_id: str | None = Field(default=None)
-    """ID of tool call this message has the content payload for."""
+    tool_call_id: list[str] | None = Field(default=None)
+    """ID(s) of tool call(s) this message has the content payload for."""
 class ChatMessageAssistant(ChatMessageBase):
@@ -83,6 +85,31 @@ class ChatMessageAssistant(ChatMessageBase):
     tool_calls: list[ToolCall] | None = Field(default=None)
     """Tool calls made by the model."""
+    reasoning: str | None = Field(default=None)
+    """Reasoning content."""
+    # Some OpenAI compatible REST endpoints include reasoning as a field alongside
+    # content, however since this field doesn't exist in the OpenAI interface,
+    # hosting providers (so far we've seen this with Together and Groq) may
+    # include the reasoning in a <think></think> tag before the main response.
+    # We expect this pattern to be repeated elsewhere, so include this hook to
+    # automatically extract the reasoning content when the response is prefaced
+    # with a <think> block. If this ends up being an overeach we can fall back
+    # to each provider manually parsing out <think> using a helper function.
+    # The implementation isn't important here, the critical thing to establish
+    # is that Inspect makes reasoning content available separately.
+    @model_validator(mode="before")
+    @classmethod
+    def extract_reasoning(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            content = data.get("content", None)
+            if isinstance(content, str):
+                parsed = parse_content_with_reasoning(content)
+                if parsed:
+                    data["reasoning"] = parsed.reasoning
+                    data["content"] = parsed.content
+        return data
 class ChatMessageTool(ChatMessageBase):
     role: Literal["tool"] = Field(default="tool")

inspect_ai/model/_conversation.py CHANGED Viewed

@@ -2,7 +2,7 @@ from rich.console import RenderableType
 from rich.text import Text
 from inspect_ai._util.rich import lines_display
-from inspect_ai._util.transcript import transcript_markdown
+from inspect_ai._util.transcript import transcript_markdown, transcript_reasoning
 from inspect_ai.util._conversation import conversation_panel
 from inspect_ai.util._display import display_type
@@ -38,8 +38,15 @@ def conversation_assistant_message(
                 content=transcript_markdown(m.text, escape=True),
             )
-        # start with assistant content
-        content: list[RenderableType] = (
+        # build content
+        content: list[RenderableType] = []
+        # reasoning
+        if message.reasoning:
+            content.extend(transcript_reasoning(message.reasoning))
+        # message text
+        content.extend(
             [transcript_markdown(message.text, escape=True)] if message.text else []
         )

inspect_ai/model/_generate_config.py CHANGED Viewed

@@ -75,6 +75,9 @@ class GenerateConfigArgs(TypedDict, total=False):
     reasoning_effort: Literal["low", "medium", "high"] | None
     """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
+    reasoning_history: bool | None
+    """Include reasoning in chat message history sent to generate."""
 class GenerateConfig(BaseModel):
     """Base class for model generation configs."""
@@ -145,6 +148,9 @@ class GenerateConfig(BaseModel):
     reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
     """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
+    reasoning_history: bool | None = Field(default=None)
+    """Include reasoning in chat message history sent to generate."""
     def merge(
         self, other: Union["GenerateConfig", GenerateConfigArgs]
     ) -> "GenerateConfig":

inspect_ai/model/_model.py CHANGED Viewed

@@ -33,7 +33,6 @@ from inspect_ai._util.trace import trace_action
 from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
 from inspect_ai.tool._tool_def import ToolDef, tool_defs
 from inspect_ai.util import concurrency
-from inspect_ai.util._limit import SampleLimitExceededError
 from ._cache import CacheEntry, CachePolicy, cache_fetch, cache_store
 from ._call_tools import disable_parallel_tools, tool_call_view, tools_info
@@ -169,6 +168,10 @@ class ModelAPI(abc.ABC):
         """Tool results can contain images"""
         return False
+    def has_reasoning_history(self) -> bool:
+        """Chat message assistant messages can include reasoning."""
+        return False
 class Model:
     """Model interface."""
@@ -303,6 +306,11 @@ class Model:
                 tools = []
             tool_choice = "none"
+        # handle reasoning history
+        input = resolve_reasoning_history(
+            input, config, self.api.has_reasoning_history()
+        )
         # apply any tool model_input handlers
         input = resolve_tool_model_input(tdefs, input)
@@ -727,6 +735,71 @@ def simple_input_messages(
     return messages
+def resolve_reasoning_history(
+    messages: list[ChatMessage], config: GenerateConfig, api_has_reasoning_history: bool
+) -> list[ChatMessage]:
+    # determine if we are including reasoning history
+    reasoning_history = config.reasoning_history is not False
+    # determine up front if we have any reasoning content
+    have_reasoning = any(
+        [
+            isinstance(m, ChatMessageAssistant) and m.reasoning is not None
+            for m in messages
+        ]
+    )
+    if not have_reasoning:
+        return messages
+    # API asssistant message format directly supports reasoning history so we will:
+    #   (a) Remove reasoning content entirely if config says not to include it; or
+    #   (b) Leave the messages alone if config says to include it
+    if api_has_reasoning_history:
+        # remove reasoning history as per config
+        if not reasoning_history:
+            resolved_messages: list[ChatMessage] = []
+            for message in messages:
+                if isinstance(message, ChatMessageAssistant):
+                    resolved_messages.append(
+                        message.model_copy(update={"reasoning": None})
+                    )
+                else:
+                    resolved_messages.append(message)
+            return resolved_messages
+        # include reasoning history as per config
+        else:
+            return messages
+    # API can't represent reasoning natively so include <think> tags
+    elif reasoning_history:
+        resolved_messages = []
+        for message in messages:
+            if (
+                isinstance(message, ChatMessageAssistant)
+                and message.reasoning is not None
+            ):
+                message = deepcopy(message)
+                if isinstance(message.content, str):
+                    message.content = (
+                        f"<think>\n{message.reasoning}\n</think>\n\n{message.content}"
+                    )
+                else:
+                    message.content.insert(
+                        0, ContentText(text=f"<think>\n{message.reasoning}\n</think>\n")
+                    )
+                message.reasoning = None
+            resolved_messages.append(message)
+        return resolved_messages
+    # api doesn't handle reasoning and config says no reasoning_history, nothing to do
+    else:
+        return messages
 def resolve_tool_model_input(
     tdefs: list[ToolDef], messages: list[ChatMessage]
 ) -> list[ChatMessage]:
@@ -764,40 +837,104 @@ def resolve_tool_model_input(
 def tool_result_images_as_user_message(
     messages: list[ChatMessage],
 ) -> list[ChatMessage]:
-    return functools.reduce(tool_result_images_reducer, messages, [])
+    """
+    To conform to models lacking support for images in tool responses, create an alternate message history that moves images into a fabricated user message.
+    Tool responses will have images replaced with "Image content is included below.", and the new user message will contain the images.
+    """
+    init_accum: ImagesAccumulator = ([], [], [])
+    chat_messages, user_message_content, tool_call_ids = functools.reduce(
+        tool_result_images_reducer, messages, init_accum
+    )
+    # if the last message was a tool result, we may need to flush the pending stuff here
+    return maybe_adding_user_message(chat_messages, user_message_content, tool_call_ids)
+ImagesAccumulator = tuple[list[ChatMessage], list[Content], list[str]]
+"""
+ImagesAccumulator is a tuple containing three lists:
+- The first list contains ChatMessages that are the result of processing.
+- The second list contains ContentImages that need to be inserted into a fabricated user message.
+- The third list contains the tool_call_id's associated with the tool responses.
+"""
 def tool_result_images_reducer(
-    messages: list[ChatMessage],
+    accum: ImagesAccumulator,
     message: ChatMessage,
-) -> list[ChatMessage]:
+) -> ImagesAccumulator:
+    messages, pending_content, tool_call_ids = accum
     # if there are tool result images, pull them out into a ChatUserMessage
-    if isinstance(message, ChatMessageTool) and isinstance(message.content, list):
-        tool_message = ChatMessageTool(
-            content=message.content.copy(),
-            tool_call_id=message.tool_call_id,
-            function=message.function,
+    if (
+        isinstance(message, ChatMessageTool)
+        and isinstance(message.content, list)
+        and any([isinstance(c, ContentImage) for c in message.content])
+    ):
+        init_accum: ImageContentAccumulator = ([], [])
+        new_user_message_content, edited_tool_message_content = functools.reduce(
+            tool_result_image_content_reducer, message.content, init_accum
         )
-        assert isinstance(tool_message.content, list)
-        messages.append(tool_message)
-        user_content: list[Content] = []
-        for i in range(0, len(tool_message.content)):
-            if isinstance(tool_message.content[i], ContentImage):
-                user_content.append(message.content[i])
-                tool_message.content[i] = ContentText(
-                    text="Image content is in the message below."
+        return (
+            messages
+            + [
+                ChatMessageTool(
+                    content=edited_tool_message_content,
+                    tool_call_id=message.tool_call_id,
+                    function=message.function,
                 )
-        if len(user_content) > 0:
-            messages.append(
-                ChatMessageUser(content=user_content, tool_call_id=message.tool_call_id)
-            )
+            ],
+            pending_content + new_user_message_content,
+            tool_call_ids + ([message.tool_call_id] if message.tool_call_id else []),
+        )
     else:
-        messages.append(message)
+        return (
+            maybe_adding_user_message(messages, pending_content, tool_call_ids)
+            + [message],
+            [],
+            [],
+        )
-    # return messages
-    return messages
+ImageContentAccumulator = tuple[list[Content], list[Content]]
+"""
+ImageContentAccumulator is a tuple containing two lists of Content objects:
+- The first list contains ContentImages that will be included in a fabricated user message.
+- The second list contains modified content for the tool message with images replaced with text.
+"""
+def tool_result_image_content_reducer(
+    acc: ImageContentAccumulator, content: Content
+) -> ImageContentAccumulator:
+    """
+    Reduces the messages Content into two separate lists: one for a fabricated user message that will contain the images and one for modified tool message with the images replaced with text.
+    Returns:
+      ImageContentReducer: A tuple containing two lists of Content objects.
+        - The first list contains the images that will be included in a fabricated user message.
+        - The second list contains modified content for the tool message with images replaced with text.
+    """
+    new_user_message_content, edited_tool_message_content = acc
+    if isinstance(content, ContentImage):
+        return new_user_message_content + [content], edited_tool_message_content + [
+            ContentText(text="Image content is included below.")
+        ]
+    else:
+        return new_user_message_content, edited_tool_message_content + [content]
+def maybe_adding_user_message(
+    messages: list[ChatMessage], content: list[Content], tool_call_ids: list[str]
+) -> list[ChatMessage]:
+    """If content is empty, return messages, otherwise, create a new ChatMessageUser with it and return a new messages list with that message added."""
+    return (
+        messages + [ChatMessageUser(content=content, tool_call_id=tool_call_ids)]
+        if content
+        else messages
+    )
 # Functions to reduce consecutive user messages to a single user message -> required for some models
@@ -884,6 +1021,7 @@ def handle_sample_message_limit(input: str | list[ChatMessage]) -> None:
         active_sample_message_limit,
         set_active_sample_total_messages,
     )
+    from inspect_ai.solver._limit import SampleLimitExceededError
     total_messages = 1 if isinstance(input, str) else len(input)
     message_limit = active_sample_message_limit()
@@ -910,6 +1048,7 @@ def record_model_usage(model: str, usage: ModelUsage) -> None:
         active_sample_token_limit,
         set_active_sample_total_tokens,
     )
+    from inspect_ai.solver._limit import SampleLimitExceededError
     # record usage
     set_model_usage(model, usage, sample_model_usage_context_var.get(None))

inspect_ai/model/_openai.py CHANGED Viewed

@@ -43,10 +43,18 @@ from ._chat_message import (
 from ._model_output import ModelUsage, StopReason, as_stop_reason
+def is_o_series(name: str) -> bool:
+    return is_o1(name) or is_o3(name)
 def is_o1(name: str) -> bool:
     return name.startswith("o1")
+def is_o3(name: str) -> bool:
+    return name.startswith("o3")
 def is_o1_full(name: str) -> bool:
     return is_o1(name) and not is_o1_mini(name) and not is_o1_preview(name)
@@ -55,10 +63,18 @@ def is_o1_mini(name: str) -> bool:
     return name.startswith("o1-mini")
+def is_o3_mini(name: str) -> bool:
+    return name.startswith("o3-mini")
 def is_o1_preview(name: str) -> bool:
     return name.startswith("o1-preview")
+def is_gpt(name: str) -> bool:
+    return name.startswith("gpt")
 def openai_chat_tool_call(tool_call: ToolCall) -> ChatCompletionMessageToolCall:
     return ChatCompletionMessageToolCall(
         type="function",
@@ -296,6 +312,14 @@ def chat_messages_from_openai(
             else:
                 content = [content_from_openai(c) for c in asst_content]
+            # resolve reasoning (OpenAI doesn't suport this however OpenAI-compatible
+            # interfaces e.g. DeepSeek do include this field so we pluck it out)
+            reasoning = message.get("reasoning_content", None) or message.get(
+                "reasoning", None
+            )
+            if reasoning is not None:
+                reasoning = str(reasoning)
             # return message
             if "tool_calls" in message:
                 tool_calls: list[ToolCall] = []
@@ -306,7 +330,11 @@ def chat_messages_from_openai(
             else:
                 tool_calls = []
             chat_messages.append(
-                ChatMessageAssistant(content=content, tool_calls=tool_calls or None)
+                ChatMessageAssistant(
+                    content=content,
+                    tool_calls=tool_calls or None,
+                    reasoning=reasoning,
+                )
             )
         elif message["role"] == "tool":
             tool_content = message.get("content", None) or ""
@@ -357,10 +385,14 @@ def chat_message_assistant_from_openai(
     message: ChatCompletionMessage, tools: list[ToolInfo]
 ) -> ChatMessageAssistant:
     refusal = getattr(message, "refusal", None)
+    reasoning = getattr(message, "reasoning_content", None) or getattr(
+        message, "reasoning", None
+    )
     return ChatMessageAssistant(
         content=refusal or message.content or "",
         source="generate",
         tool_calls=chat_tool_calls_from_openai(message, tools),
+        reasoning=reasoning,
     )

inspect-ai 0.3.60__py3-none-any.whl → 0.3.62__py3-none-any.whl

inspect-ai 0.3.60py3-none-any.whl → 0.3.62py3-none-any.whl