inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -2
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +1 -1
  16. inspect_ai/_eval/evalset.py +3 -3
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +5 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +2 -0
  21. inspect_ai/_eval/task/run.py +65 -39
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/server.py +17 -0
  28. inspect_ai/_view/www/dist/assets/index.css +93 -31
  29. inspect_ai/_view/www/dist/assets/index.js +10639 -10011
  30. inspect_ai/_view/www/log-schema.json +418 -1
  31. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  32. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  36. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  37. inspect_ai/_view/www/package.json +2 -2
  38. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  39. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  40. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  41. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  42. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  43. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  45. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
  46. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  48. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  49. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  50. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  51. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  55. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  56. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  57. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  58. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  59. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  60. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  61. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  62. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
  63. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
  64. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
  65. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
  66. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  67. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  68. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  69. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
  70. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  71. inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
  72. inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
  73. inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
  74. inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
  75. inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
  76. inspect_ai/_view/www/src/client/api/types.ts +3 -0
  77. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  78. inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
  79. inspect_ai/_view/www/src/tests/README.md +2 -2
  80. inspect_ai/_view/www/src/utils/git.ts +3 -1
  81. inspect_ai/_view/www/src/utils/html.ts +6 -0
  82. inspect_ai/agent/_handoff.py +8 -5
  83. inspect_ai/agent/_react.py +5 -5
  84. inspect_ai/dataset/_dataset.py +1 -1
  85. inspect_ai/log/_condense.py +5 -0
  86. inspect_ai/log/_file.py +4 -1
  87. inspect_ai/log/_log.py +9 -4
  88. inspect_ai/log/_recorders/json.py +4 -2
  89. inspect_ai/log/_samples.py +5 -0
  90. inspect_ai/log/_util.py +2 -0
  91. inspect_ai/model/__init__.py +14 -0
  92. inspect_ai/model/_call_tools.py +17 -8
  93. inspect_ai/model/_chat_message.py +3 -0
  94. inspect_ai/model/_openai_responses.py +80 -34
  95. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  96. inspect_ai/model/_providers/_google_citations.py +100 -0
  97. inspect_ai/model/_providers/anthropic.py +219 -36
  98. inspect_ai/model/_providers/google.py +98 -22
  99. inspect_ai/model/_providers/mistral.py +20 -7
  100. inspect_ai/model/_providers/openai.py +11 -10
  101. inspect_ai/model/_providers/openai_compatible.py +3 -2
  102. inspect_ai/model/_providers/openai_responses.py +2 -5
  103. inspect_ai/model/_providers/perplexity.py +123 -0
  104. inspect_ai/model/_providers/providers.py +13 -2
  105. inspect_ai/model/_providers/vertex.py +3 -0
  106. inspect_ai/model/_trim.py +5 -0
  107. inspect_ai/tool/__init__.py +14 -0
  108. inspect_ai/tool/_mcp/_mcp.py +5 -2
  109. inspect_ai/tool/_mcp/sampling.py +19 -3
  110. inspect_ai/tool/_mcp/server.py +1 -1
  111. inspect_ai/tool/_tool.py +10 -1
  112. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  113. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  114. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  115. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  116. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  117. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  118. inspect_ai/util/__init__.py +8 -0
  119. inspect_ai/util/_background.py +64 -0
  120. inspect_ai/util/_display.py +11 -2
  121. inspect_ai/util/_limit.py +72 -5
  122. inspect_ai/util/_sandbox/__init__.py +2 -0
  123. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  124. inspect_ai/util/_sandbox/service.py +28 -7
  125. inspect_ai/util/_span.py +12 -1
  126. inspect_ai/util/_subprocess.py +51 -38
  127. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
  128. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
  129. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  130. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  131. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
  132. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
  133. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
  134. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
@@ -51,8 +51,11 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
51
51
  // For `code` tags, reverse the escaping if we can
52
52
  const withCode = unescapeCodeHtmlEntities(unescaped);
53
53
 
54
+ // For `sup` tags, reverse the escaping if we can
55
+ const withSup = unescapeSupHtmlEntities(withCode);
56
+
54
57
  // Return the rendered markdown
55
- const markup = { __html: withCode };
58
+ const markup = { __html: withSup };
56
59
 
57
60
  return (
58
61
  <div
@@ -65,7 +68,7 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
65
68
  },
66
69
  );
67
70
 
68
- const kLetterListPattern = /^([a-zA-Z0-9][).]\s.*?)$/gm;
71
+ const kLetterListPattern = /^([a-zA-Z][).]\s.*?)$/gm;
69
72
  const kCommonmarkReferenceLinkPattern = /\[([^\]]*)\]: (?!http)(.*)/g;
70
73
 
71
74
  const protectBackslashesInLatex = (content: string): string => {
@@ -193,6 +196,16 @@ const unprotectMarkdown = (txt: string): string => {
193
196
  return txt;
194
197
  };
195
198
 
199
+ function unescapeSupHtmlEntities(str: string): string {
200
+ // replace &lt;sup&gt; with <sup>
201
+ if (!str) {
202
+ return str;
203
+ }
204
+ return str
205
+ .replace(/&lt;sup&gt;/g, "<sup>")
206
+ .replace(/&lt;\/sup&gt;/g, "</sup>");
207
+ }
208
+
196
209
  function unescapeCodeHtmlEntities(str: string): string {
197
210
  if (!str) return str;
198
211
 
@@ -1,6 +1,7 @@
1
1
  import { Event } from "../app/types";
2
2
  import {
3
3
  AttachmentData,
4
+ ClientAPI,
4
5
  EventData,
5
6
  SampleData,
6
7
  SampleSummary,
@@ -183,6 +184,8 @@ export function createSamplePolling(
183
184
  const processedEvents = processEvents(
184
185
  sampleDataResponse.sampleData,
185
186
  pollingState,
187
+ api,
188
+ logFile,
186
189
  );
187
190
 
188
191
  // update max attachment id
@@ -268,7 +271,12 @@ function processAttachments(
268
271
  });
269
272
  }
270
273
 
271
- function processEvents(sampleData: SampleData, pollingState: PollingState) {
274
+ function processEvents(
275
+ sampleData: SampleData,
276
+ pollingState: PollingState,
277
+ api: ClientAPI,
278
+ log_file: string,
279
+ ) {
272
280
  // Go through each event and resolve it, either appending or replacing
273
281
  log.debug(`Processing ${sampleData.events.length} events`);
274
282
  if (sampleData.events.length === 0) {
@@ -289,6 +297,14 @@ function processEvents(sampleData: SampleData, pollingState: PollingState) {
289
297
  attachmentId,
290
298
  available_attachments: Object.keys(pollingState.attachments),
291
299
  };
300
+
301
+ if (api.log_message) {
302
+ api.log_message(
303
+ log_file,
304
+ `Unable to resolve attachment ${attachmentId}\n` +
305
+ JSON.stringify(snapshot),
306
+ );
307
+ }
292
308
  console.warn(`Unable to resolve attachment ${attachmentId}`, snapshot);
293
309
  },
294
310
  );
@@ -5,8 +5,8 @@ This directory contains the test files for the application. The test framework i
5
5
  ## Directory Structure
6
6
 
7
7
  - `tests/`: Root directory for all tests
8
- - `__mocks__/`: Mock files for CSS modules and other assets
9
- - `setupTests.mjs`: Setup file for Jest tests
8
+ - `__mocks__/`: Mock files for CSS modules and other assets
9
+ - `setupTests.mjs`: Setup file for Jest tests
10
10
 
11
11
  ## Running Tests
12
12
 
@@ -2,6 +2,8 @@
2
2
  * Generates a GitHub commit URL based on the repository origin URL and the commit hash.
3
3
  */
4
4
  export const ghCommitUrl = (origin: string, commit: string): string => {
5
- const baseUrl = origin.replace(/\.git$/, "");
5
+ const baseUrl = origin
6
+ .replace(/\.git$/, "")
7
+ .replace(/^git@github.com:/, "https://github.com/");
6
8
  return `${baseUrl}/commit/${commit}`;
7
9
  };
@@ -4,3 +4,9 @@
4
4
  export function escapeSelector(id: string): string {
5
5
  return id.replace(/([ #.;,?!+*~'":^$[\]()=>|/\\])/g, "\\$1");
6
6
  }
7
+
8
+ export const decodeHtmlEntities = (text: string): string => {
9
+ const parser = new DOMParser();
10
+ const doc = parser.parseFromString(text, "text/html");
11
+ return doc.documentElement.textContent || text;
12
+ };
@@ -6,7 +6,7 @@ from inspect_ai._util.registry import (
6
6
  registry_unqualified_name,
7
7
  set_registry_info,
8
8
  )
9
- from inspect_ai.tool._tool import Tool, ToolResult, ToolSource
9
+ from inspect_ai.tool._tool import TOOL_PARALLEL, Tool, ToolResult, ToolSource
10
10
  from inspect_ai.tool._tool_def import ToolDef
11
11
  from inspect_ai.tool._tool_description import ToolDescription, set_tool_description
12
12
  from inspect_ai.util._limit import Limit
@@ -37,9 +37,9 @@ def handoff(
37
37
  Use the built-in `last_message` filter to return only the last message
38
38
  or alternatively specify a custom `MessageFilter` function.
39
39
  tool_name: Alternate tool name (defaults to `transfer_to_{agent_name}`)
40
- limits: List of limits to apply to the agent. Should a limit be exceeded,
41
- the agent stops and a user message is appended explaining that a limit was
42
- exceeded.
40
+ limits: List of limits to apply to the agent. Limits are scoped to each
41
+ handoff to the agent. Should a limit be exceeded, the agent stops and a user
42
+ message is appended explaining that a limit was exceeded.
43
43
  **agent_kwargs: Arguments to curry to `Agent` function (arguments provided here
44
44
  will not be presented to the model as part of the tool interface).
45
45
 
@@ -61,7 +61,10 @@ def handoff(
61
61
  agent, tool_info.name, input_filter, output_filter, limits, **agent_kwargs
62
62
  )
63
63
  tool_name = tool_name or f"transfer_to_{tool_info.name}"
64
- set_registry_info(agent_tool, RegistryInfo(type="tool", name=tool_name))
64
+ set_registry_info(
65
+ agent_tool,
66
+ RegistryInfo(type="tool", name=tool_name, metadata={TOOL_PARALLEL: False}),
67
+ )
65
68
  set_tool_description(
66
69
  agent_tool,
67
70
  ToolDescription(
@@ -361,13 +361,13 @@ def _prompt_to_system_message(
361
361
  and ("{submit}" not in prompt.assistant_prompt)
362
362
  and prompt.submit_prompt
363
363
  ):
364
- assistant_prompt = f"{prompt.assistant_prompt}\n{prompt.submit_prompt}"
364
+ assistant_prompt = f"{prompt.assistant_prompt}\n{prompt.submit_prompt.format(submit=submit_tool)}"
365
365
  else:
366
- assistant_prompt = prompt.assistant_prompt
366
+ assistant_prompt = prompt.assistant_prompt.format(
367
+ submit=submit_tool or "submit"
368
+ )
367
369
  prompt_lines.append(assistant_prompt)
368
- prompt_content = "\n\n".join(prompt_lines).format(
369
- submit=submit_tool or "submit"
370
- )
370
+ prompt_content = "\n\n".join(prompt_lines)
371
371
  system_message: ChatMessage | None = ChatMessageSystem(content=prompt_content)
372
372
  else:
373
373
  system_message = None
@@ -308,7 +308,7 @@ class MemoryDataset(Dataset):
308
308
 
309
309
  @override
310
310
  def shuffle(self, seed: int | None = None) -> None:
311
- if seed:
311
+ if seed is not None:
312
312
  random.Random(seed).shuffle(self.samples)
313
313
  else:
314
314
  random.shuffle(self.samples)
@@ -9,6 +9,7 @@ from inspect_ai._util.constants import BASE_64_DATA_REMOVED
9
9
  from inspect_ai._util.content import (
10
10
  Content,
11
11
  ContentAudio,
12
+ ContentData,
12
13
  ContentImage,
13
14
  ContentReasoning,
14
15
  ContentText,
@@ -344,3 +345,7 @@ def walk_content(content: Content, content_fn: Callable[[str], str]) -> Content:
344
345
  return content.model_copy(update=dict(video=content_fn(content.video)))
345
346
  elif isinstance(content, ContentReasoning):
346
347
  return content.model_copy(update=dict(reasoning=content_fn(content.reasoning)))
348
+ elif isinstance(content, ContentData):
349
+ return content.model_copy(
350
+ update=dict(data=walk_json_value(content.data, content_fn))
351
+ )
inspect_ai/log/_file.py CHANGED
@@ -198,7 +198,10 @@ def write_log_dir_manifest(
198
198
  fs = filesystem(output_dir)
199
199
  manifest = f"{output_dir}{fs.sep}{filename}"
200
200
  manifest_json = to_json(
201
- value=manifest_logs, indent=2, exclude_none=True, fallback=lambda _x: None
201
+ value=jsonable_python(manifest_logs),
202
+ indent=2,
203
+ exclude_none=True,
204
+ fallback=lambda _x: None,
202
205
  )
203
206
  with file(manifest, mode="wb", fs_options=fs_options) as f:
204
207
  f.write(manifest_json)
inspect_ai/log/_log.py CHANGED
@@ -422,7 +422,7 @@ class EvalSample(BaseModel):
422
422
  # warning will handle this)
423
423
  del values["transcript"]
424
424
 
425
- return migrate_sandbox_spec(values)
425
+ return migrate_values(values)
426
426
 
427
427
  # allow field model_usage
428
428
  model_config = ConfigDict(protected_namespaces=())
@@ -707,7 +707,10 @@ class EvalSpec(BaseModel):
707
707
  """Attributes of the @task decorator."""
708
708
 
709
709
  task_args: dict[str, Any] = Field(default_factory=dict)
710
- """Arguments used for invoking the task."""
710
+ """Arguments used for invoking the task (including defaults)."""
711
+
712
+ task_args_passed: dict[str, Any] = Field(default_factory=dict)
713
+ """Arguments explicitly passed by caller for invoking the task."""
711
714
 
712
715
  solver: str | None = Field(default=None)
713
716
  """Solver name."""
@@ -782,16 +785,18 @@ class EvalSpec(BaseModel):
782
785
  def read_sandbox_spec(
783
786
  cls: Type["EvalSpec"], values: dict[str, Any]
784
787
  ) -> dict[str, Any]:
785
- return migrate_sandbox_spec(values)
788
+ return migrate_values(values)
786
789
 
787
790
 
788
- def migrate_sandbox_spec(values: dict[str, Any]) -> dict[str, Any]:
791
+ def migrate_values(values: dict[str, Any]) -> dict[str, Any]:
789
792
  if "sandbox" in values:
790
793
  sandbox = values.get("sandbox")
791
794
  if isinstance(sandbox, list):
792
795
  values["sandbox"] = SandboxEnvironmentSpec(
793
796
  type=sandbox[0], config=sandbox[1]
794
797
  )
798
+ if "task_args_passed" not in values:
799
+ values["task_args_passed"] = values.get("task_args", {})
795
800
  return values
796
801
 
797
802
 
@@ -3,6 +3,7 @@ from typing import Any, Literal, get_args
3
3
 
4
4
  import ijson # type: ignore
5
5
  from ijson import IncompleteJSONError
6
+ from ijson.backends.python import UnexpectedSymbol # type: ignore
6
7
  from pydantic import BaseModel
7
8
  from pydantic_core import from_json
8
9
  from typing_extensions import override
@@ -129,12 +130,13 @@ class JSONRecorder(FileRecorder):
129
130
  # The Python JSON serializer supports NaN and Inf, however
130
131
  # this isn't technically part of the JSON spec. The json-stream
131
132
  # library shares this limitation, so if we fail with an
132
- # invalid character then we move on and and parse w/ pydantic
133
+ # invalid character (or Unexpected symbol) then we move on and and parse w/ pydantic
133
134
  # (which does support NaN and Inf by default)
134
- except (ValueError, IncompleteJSONError) as ex:
135
+ except (ValueError, IncompleteJSONError, UnexpectedSymbol) as ex:
135
136
  if (
136
137
  str(ex).find("Invalid JSON character") != -1
137
138
  or str(ex).find("invalid char in json text") != -1
139
+ or str(ex).find("Unexpected symbol") != -1
138
140
  ):
139
141
  pass
140
142
  else:
@@ -3,6 +3,7 @@ from contextvars import ContextVar
3
3
  from datetime import datetime
4
4
  from typing import AsyncGenerator, Iterator, Literal
5
5
 
6
+ from anyio.abc import TaskGroup
6
7
  from shortuuid import uuid
7
8
 
8
9
  from inspect_ai.dataset._dataset import Sample
@@ -28,6 +29,7 @@ class ActiveSample:
28
29
  fails_on_error: bool,
29
30
  transcript: Transcript,
30
31
  sandboxes: dict[str, SandboxConnection],
32
+ tg: TaskGroup,
31
33
  ) -> None:
32
34
  self.id = uuid()
33
35
  self.started: float | None = None
@@ -47,6 +49,7 @@ class ActiveSample:
47
49
  self.transcript = transcript
48
50
  self.sandboxes = sandboxes
49
51
  self._interrupt_action: Literal["score", "error"] | None = None
52
+ self.tg = tg
50
53
 
51
54
  @property
52
55
  def running_time(self) -> float:
@@ -86,6 +89,7 @@ async def active_sample(
86
89
  working_limit: int | None,
87
90
  fails_on_error: bool,
88
91
  transcript: Transcript,
92
+ tg: TaskGroup,
89
93
  ) -> AsyncGenerator[ActiveSample, None]:
90
94
  # create the sample
91
95
  active = ActiveSample(
@@ -101,6 +105,7 @@ async def active_sample(
101
105
  sandboxes=await sandbox_connections(),
102
106
  fails_on_error=fails_on_error,
103
107
  transcript=transcript,
108
+ tg=tg,
104
109
  )
105
110
 
106
111
  _active_samples.append(active)
inspect_ai/log/_util.py CHANGED
@@ -4,6 +4,7 @@ from typing import Any
4
4
 
5
5
  from inspect_ai._util.content import (
6
6
  ContentAudio,
7
+ ContentData,
7
8
  ContentImage,
8
9
  ContentReasoning,
9
10
  ContentText,
@@ -24,6 +25,7 @@ def text_input_only(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
24
25
  | ContentImage
25
26
  | ContentAudio
26
27
  | ContentVideo
28
+ | ContentData
27
29
  ] = []
28
30
  for content in message.content:
29
31
  if content.type == "text":
@@ -1,8 +1,16 @@
1
1
  # ruff: noqa: F401 F403 F405
2
2
 
3
+ from inspect_ai._util.citation import (
4
+ Citation,
5
+ CitationBase,
6
+ ContentCitation,
7
+ DocumentCitation,
8
+ UrlCitation,
9
+ )
3
10
  from inspect_ai._util.content import (
4
11
  Content,
5
12
  ContentAudio,
13
+ ContentData,
6
14
  ContentImage,
7
15
  ContentReasoning,
8
16
  ContentText,
@@ -59,6 +67,7 @@ __all__ = [
59
67
  "ResponseSchema",
60
68
  "CachePolicy",
61
69
  "ContentAudio",
70
+ "ContentData",
62
71
  "ContentImage",
63
72
  "ContentReasoning",
64
73
  "ContentText",
@@ -93,6 +102,11 @@ __all__ = [
93
102
  "cache_size",
94
103
  "get_model",
95
104
  "modelapi",
105
+ "Citation",
106
+ "CitationBase",
107
+ "DocumentCitation",
108
+ "ContentCitation",
109
+ "UrlCitation",
96
110
  ]
97
111
 
98
112
  _TOOL_MODULE_VERSION = "0.3.18"
@@ -1,7 +1,7 @@
1
1
  import inspect
2
2
  import json
3
3
  import types
4
- from copy import copy
4
+ from copy import copy, deepcopy
5
5
  from dataclasses import is_dataclass
6
6
  from datetime import date, datetime, time
7
7
  from enum import EnumMeta
@@ -36,6 +36,7 @@ from pydantic import BaseModel
36
36
  from inspect_ai._util.content import (
37
37
  Content,
38
38
  ContentAudio,
39
+ ContentData,
39
40
  ContentImage,
40
41
  ContentText,
41
42
  ContentVideo,
@@ -188,13 +189,19 @@ async def execute_tools(
188
189
  # types to string as that is what the model APIs accept
189
190
  truncated: tuple[int, int] | None = None
190
191
  if isinstance(
191
- result, ContentText | ContentImage | ContentAudio | ContentVideo
192
+ result,
193
+ ContentText | ContentImage | ContentAudio | ContentVideo | ContentData,
192
194
  ):
193
195
  content: str | list[Content] = [result]
194
196
  elif isinstance(result, list) and (
195
197
  len(result) == 0
196
198
  or isinstance(
197
- result[0], ContentText | ContentImage | ContentAudio | ContentVideo
199
+ result[0],
200
+ ContentText
201
+ | ContentImage
202
+ | ContentAudio
203
+ | ContentVideo
204
+ | ContentData,
198
205
  )
199
206
  ):
200
207
  content = result
@@ -471,7 +478,9 @@ async def agent_handoff(
471
478
  limit_error: LimitExceededError | None = None
472
479
  agent_state = AgentState(messages=copy(agent_conversation))
473
480
  try:
474
- with apply_limits(agent_tool.limits):
481
+ # The agent_tool's limits will be applied multiple times if the agent is handed
482
+ # off to multiple times which is not supported, so create a copy of each limit.
483
+ with apply_limits(deepcopy(agent_tool.limits)):
475
484
  async with span(name=agent_name, type="agent"):
476
485
  agent_state = await agent_tool.agent(agent_state, **arguments)
477
486
  except LimitExceededError as ex:
@@ -525,11 +534,11 @@ def prepend_agent_name(
525
534
  content = copy(message.content)
526
535
  for i in range(0, len(content)):
527
536
  if isinstance(content[i], ContentText):
528
- content[i] = content[i].model_copy(
529
- update=dict(
530
- text=f"[{agent_name}] {cast(ContentText, content[i]).text}"
537
+ text = cast(ContentText, content[i]).text
538
+ if text:
539
+ content[i] = content[i].model_copy(
540
+ update=dict(text=f"[{agent_name}] {text}")
531
541
  )
532
- )
533
542
  break
534
543
  return message.model_copy(update=dict(content=content))
535
544
 
@@ -26,6 +26,9 @@ class ChatMessageBase(BaseModel):
26
26
  source: Literal["input", "generate"] | None = Field(default=None)
27
27
  """Source of message."""
28
28
 
29
+ metadata: dict[str, Any] | None = Field(default=None)
30
+ """Additional message metadata."""
31
+
29
32
  internal: JsonValue | None = Field(default=None)
30
33
  """Model provider specific payload - typically used to aid transformation back to model types."""
31
34
 
@@ -31,9 +31,16 @@ from openai.types.responses.response_create_params import (
31
31
  ToolChoice as ResponsesToolChoice,
32
32
  )
33
33
  from openai.types.responses.response_input_item_param import FunctionCallOutput, Message
34
+ from openai.types.responses.response_output_text import (
35
+ Annotation,
36
+ AnnotationFileCitation,
37
+ AnnotationFilePath,
38
+ AnnotationURLCitation,
39
+ )
34
40
  from openai.types.responses.response_reasoning_item_param import Summary
35
41
  from pydantic import JsonValue
36
42
 
43
+ from inspect_ai._util.citation import Citation, DocumentCitation, UrlCitation
37
44
  from inspect_ai._util.content import (
38
45
  Content,
39
46
  ContentImage,
@@ -47,29 +54,30 @@ from inspect_ai.model._chat_message import ChatMessage, ChatMessageAssistant
47
54
  from inspect_ai.model._generate_config import GenerateConfig
48
55
  from inspect_ai.model._model_output import ChatCompletionChoice, StopReason
49
56
  from inspect_ai.model._openai import is_o_series
50
- from inspect_ai.model._openai_computer_use import (
57
+ from inspect_ai.tool._tool_call import ToolCall
58
+ from inspect_ai.tool._tool_choice import ToolChoice
59
+ from inspect_ai.tool._tool_info import ToolInfo
60
+
61
+ from ._providers._openai_computer_use import (
51
62
  computer_call_output,
52
63
  maybe_computer_use_preview_tool,
53
64
  tool_call_from_openai_computer_tool_call,
54
65
  )
55
- from inspect_ai.model._openai_web_search import maybe_web_search_tool
56
- from inspect_ai.tool._tool_call import ToolCall
57
- from inspect_ai.tool._tool_choice import ToolChoice
58
- from inspect_ai.tool._tool_info import ToolInfo
66
+ from ._providers._openai_web_search import maybe_web_search_tool
59
67
 
60
68
 
61
69
  async def openai_responses_inputs(
62
- messages: list[ChatMessage], model: str, store: bool
70
+ messages: list[ChatMessage], model: str
63
71
  ) -> list[ResponseInputItemParam]:
64
72
  return [
65
73
  item
66
74
  for message in messages
67
- for item in await _openai_input_item_from_chat_message(message, model, store)
75
+ for item in await _openai_input_item_from_chat_message(message, model)
68
76
  ]
69
77
 
70
78
 
71
79
  async def _openai_input_item_from_chat_message(
72
- message: ChatMessage, model: str, store: bool
80
+ message: ChatMessage, model: str
73
81
  ) -> list[ResponseInputItemParam]:
74
82
  if message.role == "system":
75
83
  content = await _openai_responses_content_list_param(message.content)
@@ -87,7 +95,7 @@ async def _openai_input_item_from_chat_message(
87
95
  )
88
96
  ]
89
97
  elif message.role == "assistant":
90
- return _openai_input_items_from_chat_message_assistant(message, store)
98
+ return _openai_input_items_from_chat_message_assistant(message)
91
99
  elif message.role == "tool":
92
100
  if message.internal:
93
101
  internal = _model_tool_call_for_internal(message.internal)
@@ -252,7 +260,18 @@ def _chat_message_assistant_from_openai_response(
252
260
  case ResponseOutputMessage(content=content, id=id):
253
261
  message_content.extend(
254
262
  [
255
- ContentText(text=c.text, internal={"id": id})
263
+ ContentText(
264
+ text=c.text,
265
+ internal={"id": id},
266
+ citations=(
267
+ [
268
+ _to_inspect_citation(annotation)
269
+ for annotation in c.annotations
270
+ ]
271
+ if c.annotations
272
+ else None
273
+ ),
274
+ )
256
275
  if isinstance(c, ResponseOutputText)
257
276
  else ContentText(
258
277
  text=c.refusal, refusal=True, internal={"id": id}
@@ -310,7 +329,7 @@ def _chat_message_assistant_from_openai_response(
310
329
 
311
330
 
312
331
  def _openai_input_items_from_chat_message_assistant(
313
- message: ChatMessageAssistant, store: bool
332
+ message: ChatMessageAssistant,
314
333
  ) -> list[ResponseInputItemParam]:
315
334
  """
316
335
  Transform a `ChatMessageAssistant` into OpenAI `ResponseInputItem`'s for playback to the model.
@@ -343,10 +362,6 @@ def _openai_input_items_from_chat_message_assistant(
343
362
  )
344
363
  suppress_output_message = message.internal is not None and not has_content_with_ids
345
364
 
346
- # if we are not storing messages on the server then blank these out
347
- if not store:
348
- tool_message_ids = {}
349
-
350
365
  # items to return
351
366
  items: list[ResponseInputItemParam] = []
352
367
  # group content by message ID
@@ -354,30 +369,21 @@ def _openai_input_items_from_chat_message_assistant(
354
369
  str | None, list[ResponseOutputTextParam | ResponseOutputRefusalParam]
355
370
  ] = {}
356
371
 
357
- for content in (
358
- list[ContentText | ContentReasoning]([ContentText(text=message.content)])
359
- if isinstance(message.content, str)
360
- else [
361
- c for c in message.content if isinstance(c, ContentText | ContentReasoning)
362
- ]
363
- ):
372
+ for content in _filter_consecutive_reasoning_blocks(content_items):
364
373
  match content:
365
374
  case ContentReasoning(reasoning=reasoning):
366
375
  assert content.signature is not None, (
367
376
  "reasoning_id must be saved in signature"
368
377
  )
369
- # if items are not stored on the server then there is no
370
- # sense appending the reasoning item as its just a pointer
371
- if store:
372
- items.append(
373
- ResponseReasoningItemParam(
374
- type="reasoning",
375
- id=content.signature,
376
- summary=[Summary(type="summary_text", text=reasoning)]
377
- if reasoning
378
- else [],
379
- )
378
+ items.append(
379
+ ResponseReasoningItemParam(
380
+ type="reasoning",
381
+ id=content.signature,
382
+ summary=[Summary(type="summary_text", text=reasoning)]
383
+ if reasoning
384
+ else [],
380
385
  )
386
+ )
381
387
  case ContentText(text=text, refusal=refusal):
382
388
  if suppress_output_message:
383
389
  continue
@@ -409,7 +415,7 @@ def _openai_input_items_from_chat_message_assistant(
409
415
  role="assistant",
410
416
  # this actually can be `None`, and it will in fact be `None` when the
411
417
  # assistant message is synthesized by the scaffold as opposed to being
412
- # replayed from the model (or when store=False)
418
+ # replayed from the model
413
419
  id=msg_id, # type: ignore[typeddict-item]
414
420
  content=content_list,
415
421
  status="completed",
@@ -531,3 +537,43 @@ def _responses_tool_alias(name: str) -> str:
531
537
 
532
538
  def _from_responses_tool_alias(name: str) -> str:
533
539
  return next((k for k, v in _responses_tool_aliases.items() if v == name), name)
540
+
541
+
542
+ def _to_inspect_citation(input: Annotation) -> Citation:
543
+ match input:
544
+ case AnnotationURLCitation(
545
+ end_index=end_index, start_index=start_index, title=title, url=url
546
+ ):
547
+ return UrlCitation(
548
+ cited_text=(start_index, end_index), title=title, url=url
549
+ )
550
+
551
+ case (
552
+ AnnotationFileCitation(file_id=file_id, index=index)
553
+ | AnnotationFilePath(file_id=file_id, index=index)
554
+ ):
555
+ return DocumentCitation(internal={"file_id": file_id, "index": index})
556
+ assert False, f"Unexpected citation type: {input.type}"
557
+
558
+
559
+ def _filter_consecutive_reasoning_blocks(
560
+ content_list: list[ContentText | ContentReasoning],
561
+ ) -> list[ContentText | ContentReasoning]:
562
+ return [
563
+ content
564
+ for i, content in enumerate(content_list)
565
+ if _should_keep_content(i, content, content_list)
566
+ ]
567
+
568
+
569
+ def _should_keep_content(
570
+ i: int,
571
+ content: ContentText | ContentReasoning,
572
+ content_list: list[ContentText | ContentReasoning],
573
+ ) -> bool:
574
+ return (
575
+ True
576
+ if not isinstance(content, ContentReasoning)
577
+ else i == len(content_list) - 1
578
+ or not isinstance(content_list[i + 1], ContentReasoning)
579
+ )