PyPI - inspect-ai - Versions diffs - 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl - Mend

inspect-ai 0.3.103py3-none-any.whl → 0.3.105py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

inspect_ai/_cli/common.py +2 -1
inspect_ai/_cli/eval.py +2 -2
inspect_ai/_display/core/active.py +3 -0
inspect_ai/_display/core/config.py +1 -0
inspect_ai/_display/core/panel.py +21 -13
inspect_ai/_display/core/results.py +3 -7
inspect_ai/_display/core/rich.py +3 -5
inspect_ai/_display/log/__init__.py +0 -0
inspect_ai/_display/log/display.py +173 -0
inspect_ai/_display/plain/display.py +2 -2
inspect_ai/_display/rich/display.py +2 -4
inspect_ai/_display/textual/app.py +1 -6
inspect_ai/_display/textual/widgets/task_detail.py +3 -14
inspect_ai/_display/textual/widgets/tasks.py +1 -1
inspect_ai/_eval/eval.py +1 -1
inspect_ai/_eval/evalset.py +3 -3
inspect_ai/_eval/registry.py +6 -1
inspect_ai/_eval/run.py +5 -1
inspect_ai/_eval/task/constants.py +1 -0
inspect_ai/_eval/task/log.py +2 -0
inspect_ai/_eval/task/run.py +65 -39
inspect_ai/_util/citation.py +88 -0
inspect_ai/_util/content.py +24 -2
inspect_ai/_util/json.py +17 -2
inspect_ai/_util/registry.py +19 -4
inspect_ai/_view/schema.py +0 -6
inspect_ai/_view/server.py +17 -0
inspect_ai/_view/www/dist/assets/index.css +93 -31
inspect_ai/_view/www/dist/assets/index.js +10639 -10011
inspect_ai/_view/www/log-schema.json +418 -1
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/@types/log.d.ts +140 -39
inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
inspect_ai/_view/www/src/client/api/types.ts +3 -0
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
inspect_ai/_view/www/src/tests/README.md +2 -2
inspect_ai/_view/www/src/utils/git.ts +3 -1
inspect_ai/_view/www/src/utils/html.ts +6 -0
inspect_ai/agent/_handoff.py +8 -5
inspect_ai/agent/_react.py +5 -5
inspect_ai/dataset/_dataset.py +1 -1
inspect_ai/log/_condense.py +5 -0
inspect_ai/log/_file.py +4 -1
inspect_ai/log/_log.py +9 -4
inspect_ai/log/_recorders/json.py +4 -2
inspect_ai/log/_samples.py +5 -0
inspect_ai/log/_util.py +2 -0
inspect_ai/model/__init__.py +14 -0
inspect_ai/model/_call_tools.py +17 -8
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_openai_responses.py +80 -34
inspect_ai/model/_providers/_anthropic_citations.py +158 -0
inspect_ai/model/_providers/_google_citations.py +100 -0
inspect_ai/model/_providers/anthropic.py +219 -36
inspect_ai/model/_providers/google.py +98 -22
inspect_ai/model/_providers/mistral.py +20 -7
inspect_ai/model/_providers/openai.py +11 -10
inspect_ai/model/_providers/openai_compatible.py +3 -2
inspect_ai/model/_providers/openai_responses.py +2 -5
inspect_ai/model/_providers/perplexity.py +123 -0
inspect_ai/model/_providers/providers.py +13 -2
inspect_ai/model/_providers/vertex.py +3 -0
inspect_ai/model/_trim.py +5 -0
inspect_ai/tool/__init__.py +14 -0
inspect_ai/tool/_mcp/_mcp.py +5 -2
inspect_ai/tool/_mcp/sampling.py +19 -3
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tool.py +10 -1
inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
inspect_ai/tool/_tools/_web_search/_google.py +22 -25
inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
inspect_ai/util/__init__.py +8 -0
inspect_ai/util/_background.py +64 -0
inspect_ai/util/_display.py +11 -2
inspect_ai/util/_limit.py +72 -5
inspect_ai/util/_sandbox/__init__.py +2 -0
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_sandbox/service.py +28 -7
inspect_ai/util/_span.py +12 -1
inspect_ai/util/_subprocess.py +51 -38
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
/inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
/inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/components/MarkdownDiv.tsx CHANGED Viewed

@@ -51,8 +51,11 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
     // For `code` tags, reverse the escaping if we can
     const withCode = unescapeCodeHtmlEntities(unescaped);
+    // For `sup` tags, reverse the escaping if we can
+    const withSup = unescapeSupHtmlEntities(withCode);
     // Return the rendered markdown
-    const markup = { __html: withCode };
+    const markup = { __html: withSup };
     return (
       <div
@@ -65,7 +68,7 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
   },
 );
-const kLetterListPattern = /^([a-zA-Z0-9][).]\s.*?)$/gm;
+const kLetterListPattern = /^([a-zA-Z][).]\s.*?)$/gm;
 const kCommonmarkReferenceLinkPattern = /\[([^\]]*)\]: (?!http)(.*)/g;
 const protectBackslashesInLatex = (content: string): string => {
@@ -193,6 +196,16 @@ const unprotectMarkdown = (txt: string): string => {
   return txt;
 };
+function unescapeSupHtmlEntities(str: string): string {
+  // replace &lt;sup&gt; with <sup>
+  if (!str) {
+    return str;
+  }
+  return str
+    .replace(/&lt;sup&gt;/g, "<sup>")
+    .replace(/&lt;\/sup&gt;/g, "</sup>");
+}
 function unescapeCodeHtmlEntities(str: string): string {
   if (!str) return str;

inspect_ai/_view/www/src/state/samplePolling.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { Event } from "../app/types";
 import {
   AttachmentData,
+  ClientAPI,
   EventData,
   SampleData,
   SampleSummary,
@@ -183,6 +184,8 @@ export function createSamplePolling(
           const processedEvents = processEvents(
             sampleDataResponse.sampleData,
             pollingState,
+            api,
+            logFile,
           );
           // update max attachment id
@@ -268,7 +271,12 @@ function processAttachments(
   });
 }
-function processEvents(sampleData: SampleData, pollingState: PollingState) {
+function processEvents(
+  sampleData: SampleData,
+  pollingState: PollingState,
+  api: ClientAPI,
+  log_file: string,
+) {
   // Go through each event and resolve it, either appending or replacing
   log.debug(`Processing ${sampleData.events.length} events`);
   if (sampleData.events.length === 0) {
@@ -289,6 +297,14 @@ function processEvents(sampleData: SampleData, pollingState: PollingState) {
           attachmentId,
           available_attachments: Object.keys(pollingState.attachments),
         };
+        if (api.log_message) {
+          api.log_message(
+            log_file,
+            `Unable to resolve attachment ${attachmentId}\n` +
+              JSON.stringify(snapshot),
+          );
+        }
         console.warn(`Unable to resolve attachment ${attachmentId}`, snapshot);
       },
     );

inspect_ai/_view/www/src/tests/README.md CHANGED Viewed

@@ -5,8 +5,8 @@ This directory contains the test files for the application. The test framework i
 ## Directory Structure
 - `tests/`: Root directory for all tests
-  - `__mocks__/`: Mock files for CSS modules and other assets
-  - `setupTests.mjs`: Setup file for Jest tests
+    - `__mocks__/`: Mock files for CSS modules and other assets
+    - `setupTests.mjs`: Setup file for Jest tests
 ## Running Tests

inspect_ai/_view/www/src/utils/git.ts CHANGED Viewed

@@ -2,6 +2,8 @@
  * Generates a GitHub commit URL based on the repository origin URL and the commit hash.
  */
 export const ghCommitUrl = (origin: string, commit: string): string => {
-  const baseUrl = origin.replace(/\.git$/, "");
+  const baseUrl = origin
+    .replace(/\.git$/, "")
+    .replace(/^git@github.com:/, "https://github.com/");
   return `${baseUrl}/commit/${commit}`;
 };

inspect_ai/_view/www/src/utils/html.ts CHANGED Viewed

@@ -4,3 +4,9 @@
 export function escapeSelector(id: string): string {
   return id.replace(/([ #.;,?!+*~'":^$[\]()=>|/\\])/g, "\\$1");
 }
+export const decodeHtmlEntities = (text: string): string => {
+  const parser = new DOMParser();
+  const doc = parser.parseFromString(text, "text/html");
+  return doc.documentElement.textContent || text;
+};

inspect_ai/agent/_handoff.py CHANGED Viewed

@@ -6,7 +6,7 @@ from inspect_ai._util.registry import (
     registry_unqualified_name,
     set_registry_info,
 )
-from inspect_ai.tool._tool import Tool, ToolResult, ToolSource
+from inspect_ai.tool._tool import TOOL_PARALLEL, Tool, ToolResult, ToolSource
 from inspect_ai.tool._tool_def import ToolDef
 from inspect_ai.tool._tool_description import ToolDescription, set_tool_description
 from inspect_ai.util._limit import Limit
@@ -37,9 +37,9 @@ def handoff(
             Use the built-in `last_message` filter to return only the last message
             or alternatively specify a custom `MessageFilter` function.
         tool_name: Alternate tool name (defaults to `transfer_to_{agent_name}`)
-        limits: List of limits to apply to the agent. Should a limit be exceeded,
-            the agent stops and a user message is appended explaining that a limit was
-            exceeded.
+        limits: List of limits to apply to the agent. Limits are scoped to each
+            handoff to the agent. Should a limit be exceeded, the agent stops and a user
+            message is appended explaining that a limit was exceeded.
         **agent_kwargs: Arguments to curry to `Agent` function (arguments provided here
             will not be presented to the model as part of the tool interface).
@@ -61,7 +61,10 @@ def handoff(
         agent, tool_info.name, input_filter, output_filter, limits, **agent_kwargs
     )
     tool_name = tool_name or f"transfer_to_{tool_info.name}"
-    set_registry_info(agent_tool, RegistryInfo(type="tool", name=tool_name))
+    set_registry_info(
+        agent_tool,
+        RegistryInfo(type="tool", name=tool_name, metadata={TOOL_PARALLEL: False}),
+    )
     set_tool_description(
         agent_tool,
         ToolDescription(

inspect_ai/agent/_react.py CHANGED Viewed

@@ -361,13 +361,13 @@ def _prompt_to_system_message(
                 and ("{submit}" not in prompt.assistant_prompt)
                 and prompt.submit_prompt
             ):
-                assistant_prompt = f"{prompt.assistant_prompt}\n{prompt.submit_prompt}"
+                assistant_prompt = f"{prompt.assistant_prompt}\n{prompt.submit_prompt.format(submit=submit_tool)}"
             else:
-                assistant_prompt = prompt.assistant_prompt
+                assistant_prompt = prompt.assistant_prompt.format(
+                    submit=submit_tool or "submit"
+                )
             prompt_lines.append(assistant_prompt)
-        prompt_content = "\n\n".join(prompt_lines).format(
-            submit=submit_tool or "submit"
-        )
+        prompt_content = "\n\n".join(prompt_lines)
         system_message: ChatMessage | None = ChatMessageSystem(content=prompt_content)
     else:
         system_message = None

inspect_ai/dataset/_dataset.py CHANGED Viewed

@@ -308,7 +308,7 @@ class MemoryDataset(Dataset):
     @override
     def shuffle(self, seed: int | None = None) -> None:
-        if seed:
+        if seed is not None:
             random.Random(seed).shuffle(self.samples)
         else:
             random.shuffle(self.samples)

inspect_ai/log/_condense.py CHANGED Viewed

@@ -9,6 +9,7 @@ from inspect_ai._util.constants import BASE_64_DATA_REMOVED
 from inspect_ai._util.content import (
     Content,
     ContentAudio,
+    ContentData,
     ContentImage,
     ContentReasoning,
     ContentText,
@@ -344,3 +345,7 @@ def walk_content(content: Content, content_fn: Callable[[str], str]) -> Content:
         return content.model_copy(update=dict(video=content_fn(content.video)))
     elif isinstance(content, ContentReasoning):
         return content.model_copy(update=dict(reasoning=content_fn(content.reasoning)))
+    elif isinstance(content, ContentData):
+        return content.model_copy(
+            update=dict(data=walk_json_value(content.data, content_fn))
+        )

inspect_ai/log/_file.py CHANGED Viewed

@@ -198,7 +198,10 @@ def write_log_dir_manifest(
     fs = filesystem(output_dir)
     manifest = f"{output_dir}{fs.sep}{filename}"
     manifest_json = to_json(
-        value=manifest_logs, indent=2, exclude_none=True, fallback=lambda _x: None
+        value=jsonable_python(manifest_logs),
+        indent=2,
+        exclude_none=True,
+        fallback=lambda _x: None,
     )
     with file(manifest, mode="wb", fs_options=fs_options) as f:
         f.write(manifest_json)

inspect_ai/log/_log.py CHANGED Viewed

@@ -422,7 +422,7 @@ class EvalSample(BaseModel):
             # warning will handle this)
             del values["transcript"]
-        return migrate_sandbox_spec(values)
+        return migrate_values(values)
     # allow field model_usage
     model_config = ConfigDict(protected_namespaces=())
@@ -707,7 +707,10 @@ class EvalSpec(BaseModel):
     """Attributes of the @task decorator."""
     task_args: dict[str, Any] = Field(default_factory=dict)
-    """Arguments used for invoking the task."""
+    """Arguments used for invoking the task (including defaults)."""
+    task_args_passed: dict[str, Any] = Field(default_factory=dict)
+    """Arguments explicitly passed by caller for invoking the task."""
     solver: str | None = Field(default=None)
     """Solver name."""
@@ -782,16 +785,18 @@ class EvalSpec(BaseModel):
     def read_sandbox_spec(
         cls: Type["EvalSpec"], values: dict[str, Any]
     ) -> dict[str, Any]:
-        return migrate_sandbox_spec(values)
+        return migrate_values(values)
-def migrate_sandbox_spec(values: dict[str, Any]) -> dict[str, Any]:
+def migrate_values(values: dict[str, Any]) -> dict[str, Any]:
     if "sandbox" in values:
         sandbox = values.get("sandbox")
         if isinstance(sandbox, list):
             values["sandbox"] = SandboxEnvironmentSpec(
                 type=sandbox[0], config=sandbox[1]
             )
+    if "task_args_passed" not in values:
+        values["task_args_passed"] = values.get("task_args", {})
     return values

inspect_ai/log/_recorders/json.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Any, Literal, get_args
 import ijson  # type: ignore
 from ijson import IncompleteJSONError
+from ijson.backends.python import UnexpectedSymbol  # type: ignore
 from pydantic import BaseModel
 from pydantic_core import from_json
 from typing_extensions import override
@@ -129,12 +130,13 @@ class JSONRecorder(FileRecorder):
             # The Python JSON serializer supports NaN and Inf, however
             # this isn't technically part of the JSON spec. The json-stream
             # library shares this limitation, so if we fail with an
-            # invalid character then we move on and and parse w/ pydantic
+            # invalid character (or Unexpected symbol) then we move on and and parse w/ pydantic
             # (which does support NaN and Inf by default)
-            except (ValueError, IncompleteJSONError) as ex:
+            except (ValueError, IncompleteJSONError, UnexpectedSymbol) as ex:
                 if (
                     str(ex).find("Invalid JSON character") != -1
                     or str(ex).find("invalid char in json text") != -1
+                    or str(ex).find("Unexpected symbol") != -1
                 ):
                     pass
                 else:

inspect_ai/log/_samples.py CHANGED Viewed

@@ -3,6 +3,7 @@ from contextvars import ContextVar
 from datetime import datetime
 from typing import AsyncGenerator, Iterator, Literal
+from anyio.abc import TaskGroup
 from shortuuid import uuid
 from inspect_ai.dataset._dataset import Sample
@@ -28,6 +29,7 @@ class ActiveSample:
         fails_on_error: bool,
         transcript: Transcript,
         sandboxes: dict[str, SandboxConnection],
+        tg: TaskGroup,
     ) -> None:
         self.id = uuid()
         self.started: float | None = None
@@ -47,6 +49,7 @@ class ActiveSample:
         self.transcript = transcript
         self.sandboxes = sandboxes
         self._interrupt_action: Literal["score", "error"] | None = None
+        self.tg = tg
     @property
     def running_time(self) -> float:
@@ -86,6 +89,7 @@ async def active_sample(
     working_limit: int | None,
     fails_on_error: bool,
     transcript: Transcript,
+    tg: TaskGroup,
 ) -> AsyncGenerator[ActiveSample, None]:
     # create the sample
     active = ActiveSample(
@@ -101,6 +105,7 @@ async def active_sample(
         sandboxes=await sandbox_connections(),
         fails_on_error=fails_on_error,
         transcript=transcript,
+        tg=tg,
     )
     _active_samples.append(active)

inspect_ai/log/_util.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any
 from inspect_ai._util.content import (
     ContentAudio,
+    ContentData,
     ContentImage,
     ContentReasoning,
     ContentText,
@@ -24,6 +25,7 @@ def text_input_only(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
                     | ContentImage
                     | ContentAudio
                     | ContentVideo
+                    | ContentData
                 ] = []
                 for content in message.content:
                     if content.type == "text":

inspect_ai/model/__init__.py CHANGED Viewed

@@ -1,8 +1,16 @@
 # ruff: noqa: F401 F403 F405
+from inspect_ai._util.citation import (
+    Citation,
+    CitationBase,
+    ContentCitation,
+    DocumentCitation,
+    UrlCitation,
+)
 from inspect_ai._util.content import (
     Content,
     ContentAudio,
+    ContentData,
     ContentImage,
     ContentReasoning,
     ContentText,
@@ -59,6 +67,7 @@ __all__ = [
     "ResponseSchema",
     "CachePolicy",
     "ContentAudio",
+    "ContentData",
     "ContentImage",
     "ContentReasoning",
     "ContentText",
@@ -93,6 +102,11 @@ __all__ = [
     "cache_size",
     "get_model",
     "modelapi",
+    "Citation",
+    "CitationBase",
+    "DocumentCitation",
+    "ContentCitation",
+    "UrlCitation",
 ]
 _TOOL_MODULE_VERSION = "0.3.18"

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import inspect
 import json
 import types
-from copy import copy
+from copy import copy, deepcopy
 from dataclasses import is_dataclass
 from datetime import date, datetime, time
 from enum import EnumMeta
@@ -36,6 +36,7 @@ from pydantic import BaseModel
 from inspect_ai._util.content import (
     Content,
     ContentAudio,
+    ContentData,
     ContentImage,
     ContentText,
     ContentVideo,
@@ -188,13 +189,19 @@ async def execute_tools(
             # types to string as that is what the model APIs accept
             truncated: tuple[int, int] | None = None
             if isinstance(
-                result, ContentText | ContentImage | ContentAudio | ContentVideo
+                result,
+                ContentText | ContentImage | ContentAudio | ContentVideo | ContentData,
             ):
                 content: str | list[Content] = [result]
             elif isinstance(result, list) and (
                 len(result) == 0
                 or isinstance(
-                    result[0], ContentText | ContentImage | ContentAudio | ContentVideo
+                    result[0],
+                    ContentText
+                    | ContentImage
+                    | ContentAudio
+                    | ContentVideo
+                    | ContentData,
                 )
             ):
                 content = result
@@ -471,7 +478,9 @@ async def agent_handoff(
     limit_error: LimitExceededError | None = None
     agent_state = AgentState(messages=copy(agent_conversation))
     try:
-        with apply_limits(agent_tool.limits):
+        # The agent_tool's limits will be applied multiple times if the agent is handed
+        # off to multiple times which is not supported, so create a copy of each limit.
+        with apply_limits(deepcopy(agent_tool.limits)):
             async with span(name=agent_name, type="agent"):
                 agent_state = await agent_tool.agent(agent_state, **arguments)
     except LimitExceededError as ex:
@@ -525,11 +534,11 @@ def prepend_agent_name(
         content = copy(message.content)
         for i in range(0, len(content)):
             if isinstance(content[i], ContentText):
-                content[i] = content[i].model_copy(
-                    update=dict(
-                        text=f"[{agent_name}] {cast(ContentText, content[i]).text}"
+                text = cast(ContentText, content[i]).text
+                if text:
+                    content[i] = content[i].model_copy(
+                        update=dict(text=f"[{agent_name}] {text}")
                     )
-                )
                 break
         return message.model_copy(update=dict(content=content))

inspect_ai/model/_chat_message.py CHANGED Viewed

@@ -26,6 +26,9 @@ class ChatMessageBase(BaseModel):
     source: Literal["input", "generate"] | None = Field(default=None)
     """Source of message."""
+    metadata: dict[str, Any] | None = Field(default=None)
+    """Additional message metadata."""
     internal: JsonValue | None = Field(default=None)
     """Model provider specific payload - typically used to aid transformation back to model types."""

inspect_ai/model/_openai_responses.py CHANGED Viewed

@@ -31,9 +31,16 @@ from openai.types.responses.response_create_params import (
     ToolChoice as ResponsesToolChoice,
 )
 from openai.types.responses.response_input_item_param import FunctionCallOutput, Message
+from openai.types.responses.response_output_text import (
+    Annotation,
+    AnnotationFileCitation,
+    AnnotationFilePath,
+    AnnotationURLCitation,
+)
 from openai.types.responses.response_reasoning_item_param import Summary
 from pydantic import JsonValue
+from inspect_ai._util.citation import Citation, DocumentCitation, UrlCitation
 from inspect_ai._util.content import (
     Content,
     ContentImage,
@@ -47,29 +54,30 @@ from inspect_ai.model._chat_message import ChatMessage, ChatMessageAssistant
 from inspect_ai.model._generate_config import GenerateConfig
 from inspect_ai.model._model_output import ChatCompletionChoice, StopReason
 from inspect_ai.model._openai import is_o_series
-from inspect_ai.model._openai_computer_use import (
+from inspect_ai.tool._tool_call import ToolCall
+from inspect_ai.tool._tool_choice import ToolChoice
+from inspect_ai.tool._tool_info import ToolInfo
+from ._providers._openai_computer_use import (
     computer_call_output,
     maybe_computer_use_preview_tool,
     tool_call_from_openai_computer_tool_call,
 )
-from inspect_ai.model._openai_web_search import maybe_web_search_tool
-from inspect_ai.tool._tool_call import ToolCall
-from inspect_ai.tool._tool_choice import ToolChoice
-from inspect_ai.tool._tool_info import ToolInfo
+from ._providers._openai_web_search import maybe_web_search_tool
 async def openai_responses_inputs(
-    messages: list[ChatMessage], model: str, store: bool
+    messages: list[ChatMessage], model: str
 ) -> list[ResponseInputItemParam]:
     return [
         item
         for message in messages
-        for item in await _openai_input_item_from_chat_message(message, model, store)
+        for item in await _openai_input_item_from_chat_message(message, model)
     ]
 async def _openai_input_item_from_chat_message(
-    message: ChatMessage, model: str, store: bool
+    message: ChatMessage, model: str
 ) -> list[ResponseInputItemParam]:
     if message.role == "system":
         content = await _openai_responses_content_list_param(message.content)
@@ -87,7 +95,7 @@ async def _openai_input_item_from_chat_message(
             )
         ]
     elif message.role == "assistant":
-        return _openai_input_items_from_chat_message_assistant(message, store)
+        return _openai_input_items_from_chat_message_assistant(message)
     elif message.role == "tool":
         if message.internal:
             internal = _model_tool_call_for_internal(message.internal)
@@ -252,7 +260,18 @@ def _chat_message_assistant_from_openai_response(
             case ResponseOutputMessage(content=content, id=id):
                 message_content.extend(
                     [
-                        ContentText(text=c.text, internal={"id": id})
+                        ContentText(
+                            text=c.text,
+                            internal={"id": id},
+                            citations=(
+                                [
+                                    _to_inspect_citation(annotation)
+                                    for annotation in c.annotations
+                                ]
+                                if c.annotations
+                                else None
+                            ),
+                        )
                         if isinstance(c, ResponseOutputText)
                         else ContentText(
                             text=c.refusal, refusal=True, internal={"id": id}
@@ -310,7 +329,7 @@ def _chat_message_assistant_from_openai_response(
 def _openai_input_items_from_chat_message_assistant(
-    message: ChatMessageAssistant, store: bool
+    message: ChatMessageAssistant,
 ) -> list[ResponseInputItemParam]:
     """
     Transform a `ChatMessageAssistant` into OpenAI `ResponseInputItem`'s for playback to the model.
@@ -343,10 +362,6 @@ def _openai_input_items_from_chat_message_assistant(
     )
     suppress_output_message = message.internal is not None and not has_content_with_ids
-    # if we are not storing messages on the server then blank these out
-    if not store:
-        tool_message_ids = {}
     # items to return
     items: list[ResponseInputItemParam] = []
     # group content by message ID
@@ -354,30 +369,21 @@ def _openai_input_items_from_chat_message_assistant(
         str | None, list[ResponseOutputTextParam | ResponseOutputRefusalParam]
     ] = {}
-    for content in (
-        list[ContentText | ContentReasoning]([ContentText(text=message.content)])
-        if isinstance(message.content, str)
-        else [
-            c for c in message.content if isinstance(c, ContentText | ContentReasoning)
-        ]
-    ):
+    for content in _filter_consecutive_reasoning_blocks(content_items):
         match content:
             case ContentReasoning(reasoning=reasoning):
                 assert content.signature is not None, (
                     "reasoning_id must be saved in signature"
                 )
-                # if items are not stored on the server then there is no
-                # sense appending the reasoning item as its just a pointer
-                if store:
-                    items.append(
-                        ResponseReasoningItemParam(
-                            type="reasoning",
-                            id=content.signature,
-                            summary=[Summary(type="summary_text", text=reasoning)]
-                            if reasoning
-                            else [],
-                        )
+                items.append(
+                    ResponseReasoningItemParam(
+                        type="reasoning",
+                        id=content.signature,
+                        summary=[Summary(type="summary_text", text=reasoning)]
+                        if reasoning
+                        else [],
                     )
+                )
             case ContentText(text=text, refusal=refusal):
                 if suppress_output_message:
                     continue
@@ -409,7 +415,7 @@ def _openai_input_items_from_chat_message_assistant(
             role="assistant",
             # this actually can be `None`, and it will in fact be `None` when the
             # assistant message is synthesized by the scaffold as opposed to being
-            # replayed from the model (or when store=False)
+            # replayed from the model
             id=msg_id,  # type: ignore[typeddict-item]
             content=content_list,
             status="completed",
@@ -531,3 +537,43 @@ def _responses_tool_alias(name: str) -> str:
 def _from_responses_tool_alias(name: str) -> str:
     return next((k for k, v in _responses_tool_aliases.items() if v == name), name)
+def _to_inspect_citation(input: Annotation) -> Citation:
+    match input:
+        case AnnotationURLCitation(
+            end_index=end_index, start_index=start_index, title=title, url=url
+        ):
+            return UrlCitation(
+                cited_text=(start_index, end_index), title=title, url=url
+            )
+        case (
+            AnnotationFileCitation(file_id=file_id, index=index)
+            | AnnotationFilePath(file_id=file_id, index=index)
+        ):
+            return DocumentCitation(internal={"file_id": file_id, "index": index})
+    assert False, f"Unexpected citation type: {input.type}"
+def _filter_consecutive_reasoning_blocks(
+    content_list: list[ContentText | ContentReasoning],
+) -> list[ContentText | ContentReasoning]:
+    return [
+        content
+        for i, content in enumerate(content_list)
+        if _should_keep_content(i, content, content_list)
+    ]
+def _should_keep_content(
+    i: int,
+    content: ContentText | ContentReasoning,
+    content_list: list[ContentText | ContentReasoning],
+) -> bool:
+    return (
+        True
+        if not isinstance(content, ContentReasoning)
+        else i == len(content_list) - 1
+        or not isinstance(content_list[i + 1], ContentReasoning)
+    )

inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl

inspect-ai 0.3.103py3-none-any.whl → 0.3.105py3-none-any.whl