inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -2
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +1 -1
  16. inspect_ai/_eval/evalset.py +2 -2
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +5 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +2 -0
  21. inspect_ai/_eval/task/run.py +1 -1
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/www/dist/assets/index.css +82 -24
  28. inspect_ai/_view/www/dist/assets/index.js +10124 -9808
  29. inspect_ai/_view/www/log-schema.json +418 -1
  30. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  31. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  32. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  36. inspect_ai/_view/www/package.json +2 -2
  37. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  38. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  39. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  40. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  41. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  42. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  43. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  45. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  46. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  48. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  49. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  50. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  51. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  55. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  56. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  57. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  58. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  59. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  61. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  62. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  63. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  64. inspect_ai/_view/www/src/tests/README.md +2 -2
  65. inspect_ai/_view/www/src/utils/git.ts +3 -1
  66. inspect_ai/_view/www/src/utils/html.ts +6 -0
  67. inspect_ai/agent/_handoff.py +3 -3
  68. inspect_ai/log/_condense.py +5 -0
  69. inspect_ai/log/_file.py +4 -1
  70. inspect_ai/log/_log.py +9 -4
  71. inspect_ai/log/_recorders/json.py +4 -2
  72. inspect_ai/log/_util.py +2 -0
  73. inspect_ai/model/__init__.py +14 -0
  74. inspect_ai/model/_call_tools.py +13 -4
  75. inspect_ai/model/_chat_message.py +3 -0
  76. inspect_ai/model/_openai_responses.py +80 -34
  77. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  78. inspect_ai/model/_providers/_google_citations.py +100 -0
  79. inspect_ai/model/_providers/anthropic.py +196 -34
  80. inspect_ai/model/_providers/google.py +94 -22
  81. inspect_ai/model/_providers/mistral.py +20 -7
  82. inspect_ai/model/_providers/openai.py +11 -10
  83. inspect_ai/model/_providers/openai_compatible.py +3 -2
  84. inspect_ai/model/_providers/openai_responses.py +2 -5
  85. inspect_ai/model/_providers/perplexity.py +123 -0
  86. inspect_ai/model/_providers/providers.py +13 -2
  87. inspect_ai/model/_providers/vertex.py +3 -0
  88. inspect_ai/model/_trim.py +5 -0
  89. inspect_ai/tool/__init__.py +14 -0
  90. inspect_ai/tool/_mcp/_mcp.py +5 -2
  91. inspect_ai/tool/_mcp/sampling.py +19 -3
  92. inspect_ai/tool/_mcp/server.py +1 -1
  93. inspect_ai/tool/_tool.py +10 -1
  94. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  95. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  96. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  97. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  98. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  99. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  100. inspect_ai/util/_display.py +11 -2
  101. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  102. inspect_ai/util/_span.py +12 -1
  103. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
  104. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
  105. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  106. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  107. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
  108. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
  109. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
  110. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,8 @@ from anthropic.types import (
22
22
  MessageParam,
23
23
  RedactedThinkingBlock,
24
24
  RedactedThinkingBlockParam,
25
+ ServerToolUseBlock,
26
+ ServerToolUseBlockParam,
25
27
  TextBlock,
26
28
  TextBlockParam,
27
29
  ThinkingBlock,
@@ -31,6 +33,9 @@ from anthropic.types import (
31
33
  ToolTextEditor20250124Param,
32
34
  ToolUseBlock,
33
35
  ToolUseBlockParam,
36
+ WebSearchTool20250305Param,
37
+ WebSearchToolResultBlock,
38
+ WebSearchToolResultBlockParam,
34
39
  message_create_params,
35
40
  )
36
41
  from anthropic.types.beta import (
@@ -43,6 +48,7 @@ from typing_extensions import override
43
48
  from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
44
49
  from inspect_ai._util.content import (
45
50
  Content,
51
+ ContentData,
46
52
  ContentImage,
47
53
  ContentReasoning,
48
54
  ContentText,
@@ -61,6 +67,10 @@ from .._generate_config import GenerateConfig
61
67
  from .._model import ModelAPI
62
68
  from .._model_call import ModelCall
63
69
  from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage, StopReason
70
+ from .._providers._anthropic_citations import (
71
+ to_anthropic_citation,
72
+ to_inspect_citation,
73
+ )
64
74
  from .util import environment_prerequisite_error, model_base_url
65
75
  from .util.hooks import HttpxHooks
66
76
 
@@ -70,6 +80,14 @@ ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
70
80
 
71
81
  INTERNAL_COMPUTER_TOOL_NAME = "computer"
72
82
 
83
+ WEB_SEARCH_COMPATIBLE_MODELS = [
84
+ "claude-opus-4-20250514",
85
+ "claude-sonnet-4-20250514",
86
+ "claude-3-7-sonnet-20250219",
87
+ "claude-3-5-sonnet-latest",
88
+ "claude-3-5-haiku-latest",
89
+ ]
90
+
73
91
 
74
92
  class AnthropicAPI(ModelAPI):
75
93
  def __init__(
@@ -232,27 +250,19 @@ class AnthropicAPI(ModelAPI):
232
250
  if self.extra_body is not None:
233
251
  request["extra_body"] = self.extra_body
234
252
 
235
- # make request (unless overrideen, stream if we are using reasoning)
253
+ # make request (unless overridden, stream if we are using reasoning)
236
254
  streaming = (
237
255
  self.is_using_thinking(config)
238
256
  if self.streaming == "auto"
239
257
  else self.streaming
240
258
  )
241
- if streaming:
242
- async with self.client.messages.stream(**request) as stream:
243
- message = await stream.get_final_message()
244
- else:
245
- message = await self.client.messages.create(**request, stream=False)
246
259
 
247
- # set response for ModelCall
248
- response = message.model_dump()
249
-
250
- # extract output
251
- output = await model_output_from_message(
252
- self.client, self.service_model_name(), message, tools
260
+ message, output = await self._perform_request_and_continuations(
261
+ request, streaming, tools
253
262
  )
254
263
 
255
- # return output and call
264
+ response = message.model_dump()
265
+
256
266
  return output, model_call()
257
267
 
258
268
  except BadRequestError as ex:
@@ -269,6 +279,50 @@ class AnthropicAPI(ModelAPI):
269
279
  else:
270
280
  raise ex
271
281
 
282
+ async def _perform_request_and_continuations(
283
+ self,
284
+ request: dict[str, Any],
285
+ streaming: bool,
286
+ tools: list[ToolInfo],
287
+ ) -> tuple[Message, ModelOutput]:
288
+ """
289
+ This helper function is split out so that it can be easily call itself recursively in cases where the model requires a continuation
290
+
291
+ It considers the result from the initial request the "head" and the result
292
+ from the continuation the "tail".
293
+ """
294
+ if streaming:
295
+ async with self.client.messages.stream(**request) as stream:
296
+ head_message = await stream.get_final_message()
297
+ else:
298
+ head_message = await self.client.messages.create(**request, stream=False)
299
+
300
+ head_model_output, continuation_required = await model_output_from_message(
301
+ self.client, self.service_model_name(), head_message, tools
302
+ )
303
+
304
+ if continuation_required:
305
+ tail_request = dict(request)
306
+ tail_request["messages"] = request["messages"] + [
307
+ MessageParam(role=head_message.role, content=head_message.content)
308
+ ]
309
+ _, tail_model_output = await self._perform_request_and_continuations(
310
+ tail_request, streaming, tools
311
+ )
312
+
313
+ head_content = _content_list(head_model_output.message.content)
314
+ tail_content = _content_list(tail_model_output.message.content)
315
+ tail_model_output.message.content = head_content + tail_content
316
+
317
+ # TODO:
318
+ # It looks weird to return the head message with the tail output, but
319
+ # the contract for this function is that it returns the head message
320
+ # even when it has needed to recurse. This is because model_call()
321
+ # above doesn't currently support multiple requests
322
+ return head_message, tail_model_output
323
+
324
+ return head_message, head_model_output
325
+
272
326
  def completion_config(
273
327
  self, config: GenerateConfig
274
328
  ) -> tuple[dict[str, Any], dict[str, str], list[str]]:
@@ -521,7 +575,11 @@ class AnthropicAPI(ModelAPI):
521
575
  self, tool: ToolInfo, config: GenerateConfig
522
576
  ) -> Optional["ToolParamDef"]:
523
577
  return (
524
- (self.computer_use_tool_param(tool) or self.text_editor_tool_param(tool))
578
+ (
579
+ self.computer_use_tool_param(tool)
580
+ or self.text_editor_tool_param(tool)
581
+ or self.web_search_tool_param(tool)
582
+ )
525
583
  if config.internal_tools is not False
526
584
  else None
527
585
  )
@@ -598,6 +656,49 @@ class AnthropicAPI(ModelAPI):
598
656
  else:
599
657
  return None
600
658
 
659
+ def web_search_tool_param(
660
+ self, tool: ToolInfo
661
+ ) -> WebSearchTool20250305Param | None:
662
+ if (
663
+ tool.name == "web_search"
664
+ and tool.options
665
+ and "anthropic" in tool.options
666
+ and self.model_name in WEB_SEARCH_COMPATIBLE_MODELS
667
+ ):
668
+ return _web_search_tool_param(tool.options["anthropic"])
669
+ else:
670
+ return None
671
+
672
+
673
+ def _web_search_tool_param(
674
+ maybe_anthropic_options: object,
675
+ ) -> WebSearchTool20250305Param:
676
+ if maybe_anthropic_options is not None and not isinstance(
677
+ maybe_anthropic_options, dict
678
+ ):
679
+ raise TypeError(
680
+ f"Expected a dictionary for anthropic_options, got {type(maybe_anthropic_options)}"
681
+ )
682
+
683
+ result = WebSearchTool20250305Param(
684
+ name="web_search",
685
+ type="web_search_20250305",
686
+ )
687
+
688
+ if maybe_anthropic_options:
689
+ if "allowed_domains" in maybe_anthropic_options:
690
+ result["allowed_domains"] = maybe_anthropic_options["allowed_domains"]
691
+ if "blocked_domains" in maybe_anthropic_options:
692
+ result["blocked_domains"] = maybe_anthropic_options["blocked_domains"]
693
+ if "cache_control" in maybe_anthropic_options:
694
+ result["cache_control"] = maybe_anthropic_options["cache_control"]
695
+ if "max_uses" in maybe_anthropic_options:
696
+ result["max_uses"] = maybe_anthropic_options["max_uses"]
697
+ if "user_location" in maybe_anthropic_options:
698
+ result["user_location"] = maybe_anthropic_options["user_location"]
699
+
700
+ return result
701
+
601
702
 
602
703
  # tools can be either a stock tool param or a special Anthropic native use tool param
603
704
  ToolParamDef = (
@@ -605,6 +706,7 @@ ToolParamDef = (
605
706
  | BetaToolComputerUse20250124Param
606
707
  | ToolTextEditor20250124Param
607
708
  | BetaToolTextEditor20241022Param
709
+ | WebSearchTool20250305Param
608
710
  )
609
711
 
610
712
 
@@ -614,6 +716,7 @@ def add_cache_control(
614
716
  | BetaToolComputerUse20250124Param
615
717
  | ToolTextEditor20250124Param
616
718
  | BetaToolTextEditor20241022Param
719
+ | WebSearchTool20250305Param
617
720
  | dict[str, Any],
618
721
  ) -> None:
619
722
  cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
@@ -698,6 +801,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
698
801
  | ImageBlockParam
699
802
  | ThinkingBlockParam
700
803
  | RedactedThinkingBlockParam
804
+ | ServerToolUseBlockParam
805
+ | WebSearchToolResultBlockParam
701
806
  ]
702
807
  ) = message.error.message
703
808
  # anthropic requires that content be populated when
@@ -735,6 +840,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
735
840
  | RedactedThinkingBlockParam
736
841
  | ImageBlockParam
737
842
  | ToolUseBlockParam
843
+ | ServerToolUseBlockParam
844
+ | WebSearchToolResultBlockParam
738
845
  ] = (
739
846
  [TextBlockParam(type="text", text=message.content or NO_CONTENT)]
740
847
  if isinstance(message.content, str)
@@ -785,7 +892,7 @@ async def model_output_from_message(
785
892
  model: str,
786
893
  message: Message,
787
894
  tools: list[ToolInfo],
788
- ) -> ModelOutput:
895
+ ) -> tuple[ModelOutput, bool]:
789
896
  # extract content and tool calls
790
897
  content: list[Content] = []
791
898
  reasoning_tokens = 0
@@ -800,7 +907,20 @@ async def model_output_from_message(
800
907
  content_text = content_text.replace("<result>", "").replace(
801
908
  "</result>", ""
802
909
  )
803
- content.append(ContentText(type="text", text=content_text))
910
+ content.append(
911
+ ContentText(
912
+ type="text",
913
+ text=content_text,
914
+ citations=(
915
+ [
916
+ to_inspect_citation(citation)
917
+ for citation in content_block.citations
918
+ ]
919
+ if content_block.citations
920
+ else None
921
+ ),
922
+ )
923
+ )
804
924
  elif isinstance(content_block, ToolUseBlock):
805
925
  tool_calls = tool_calls or []
806
926
  (tool_name, internal_name) = _names_for_tool_call(content_block.name, tools)
@@ -812,6 +932,10 @@ async def model_output_from_message(
812
932
  internal=internal_name,
813
933
  )
814
934
  )
935
+ elif isinstance(content_block, ServerToolUseBlock):
936
+ content.append(ContentData(data=content_block.model_dump()))
937
+ elif isinstance(content_block, WebSearchToolResultBlock):
938
+ content.append(ContentData(data=content_block.model_dump()))
815
939
  elif isinstance(content_block, RedactedThinkingBlock):
816
940
  content.append(
817
941
  ContentReasoning(reasoning=content_block.data, redacted=True)
@@ -827,11 +951,12 @@ async def model_output_from_message(
827
951
  )
828
952
 
829
953
  # resolve choice
954
+ stop_reason, pause_turn = message_stop_reason(message)
830
955
  choice = ChatCompletionChoice(
831
956
  message=ChatMessageAssistant(
832
957
  content=content, tool_calls=tool_calls, model=model, source="generate"
833
958
  ),
834
- stop_reason=message_stop_reason(message),
959
+ stop_reason=stop_reason,
835
960
  )
836
961
 
837
962
  # return ModelOutput
@@ -844,17 +969,20 @@ async def model_output_from_message(
844
969
  + (input_tokens_cache_read or 0)
845
970
  + message.usage.output_tokens # includes reasoning tokens
846
971
  )
847
- return ModelOutput(
848
- model=message.model,
849
- choices=[choice],
850
- usage=ModelUsage(
851
- input_tokens=message.usage.input_tokens,
852
- output_tokens=message.usage.output_tokens,
853
- total_tokens=total_tokens,
854
- input_tokens_cache_write=input_tokens_cache_write,
855
- input_tokens_cache_read=input_tokens_cache_read,
856
- reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
972
+ return (
973
+ ModelOutput(
974
+ model=message.model,
975
+ choices=[choice],
976
+ usage=ModelUsage(
977
+ input_tokens=message.usage.input_tokens,
978
+ output_tokens=message.usage.output_tokens,
979
+ total_tokens=total_tokens,
980
+ input_tokens_cache_write=input_tokens_cache_write,
981
+ input_tokens_cache_read=input_tokens_cache_read,
982
+ reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
983
+ ),
857
984
  ),
985
+ pause_turn,
858
986
  )
859
987
 
860
988
 
@@ -893,16 +1021,18 @@ def _names_for_tool_call(
893
1021
  )
894
1022
 
895
1023
 
896
- def message_stop_reason(message: Message) -> StopReason:
1024
+ def message_stop_reason(message: Message) -> tuple[StopReason, bool]:
897
1025
  match message.stop_reason:
898
1026
  case "end_turn" | "stop_sequence":
899
- return "stop"
1027
+ return "stop", False
900
1028
  case "tool_use":
901
- return "tool_calls"
1029
+ return "tool_calls", False
902
1030
  case "max_tokens":
903
- return message.stop_reason
1031
+ return message.stop_reason, False
1032
+ case "refusal":
1033
+ return "content_filter", False
904
1034
  case _:
905
- return "unknown"
1035
+ return "unknown", message.stop_reason == "pause_turn"
906
1036
 
907
1037
 
908
1038
  def split_system_messages(
@@ -918,9 +1048,24 @@ def split_system_messages(
918
1048
 
919
1049
  async def message_param_content(
920
1050
  content: Content,
921
- ) -> TextBlockParam | ImageBlockParam | ThinkingBlockParam | RedactedThinkingBlockParam:
1051
+ ) -> (
1052
+ TextBlockParam
1053
+ | ImageBlockParam
1054
+ | ThinkingBlockParam
1055
+ | RedactedThinkingBlockParam
1056
+ | ServerToolUseBlockParam
1057
+ | WebSearchToolResultBlockParam
1058
+ ):
922
1059
  if isinstance(content, ContentText):
923
- return TextBlockParam(type="text", text=content.text or NO_CONTENT)
1060
+ citations = (
1061
+ [to_anthropic_citation(citation) for citation in content.citations]
1062
+ if content.citations
1063
+ else None
1064
+ )
1065
+
1066
+ return TextBlockParam(
1067
+ type="text", text=content.text or NO_CONTENT, citations=citations
1068
+ )
924
1069
  elif isinstance(content, ContentImage):
925
1070
  # resolve to url
926
1071
  image = await file_as_data_uri(content.image)
@@ -948,6 +1093,19 @@ async def message_param_content(
948
1093
  return ThinkingBlockParam(
949
1094
  type="thinking", thinking=content.reasoning, signature=content.signature
950
1095
  )
1096
+ elif isinstance(content, ContentData):
1097
+ match content.data.get("type", None):
1098
+ case "server_tool_use":
1099
+ return cast(
1100
+ ServerToolUseBlockParam,
1101
+ ServerToolUseBlock.model_validate(content.data).model_dump(),
1102
+ )
1103
+ case "web_search_tool_result":
1104
+ return cast(
1105
+ WebSearchToolResultBlockParam,
1106
+ WebSearchToolResultBlock.model_validate(content.data).model_dump(),
1107
+ )
1108
+ raise NotImplementedError()
951
1109
  else:
952
1110
  raise RuntimeError(
953
1111
  "Anthropic models do not currently support audio or video inputs."
@@ -990,3 +1148,7 @@ def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
990
1148
  value = copy(value)
991
1149
  value.update(data=BASE_64_DATA_REMOVED)
992
1150
  return value
1151
+
1152
+
1153
+ def _content_list(input: str | list[Content]) -> list[Content]:
1154
+ return [ContentText(text=input)] if isinstance(input, str) else input
@@ -26,6 +26,7 @@ from google.genai.types import (
26
26
  GenerateContentResponse,
27
27
  GenerateContentResponsePromptFeedback,
28
28
  GenerateContentResponseUsageMetadata,
29
+ GoogleSearch,
29
30
  HarmBlockThreshold,
30
31
  HarmCategory,
31
32
  HttpOptions,
@@ -48,6 +49,7 @@ from inspect_ai._util.content import (
48
49
  )
49
50
  from inspect_ai._util.content import (
50
51
  ContentAudio,
52
+ ContentData,
51
53
  ContentImage,
52
54
  ContentReasoning,
53
55
  ContentText,
@@ -74,6 +76,7 @@ from inspect_ai.model import (
74
76
  TopLogprob,
75
77
  )
76
78
  from inspect_ai.model._model_call import ModelCall
79
+ from inspect_ai.model._providers._google_citations import get_candidate_citations
77
80
  from inspect_ai.tool import (
78
81
  ToolCall,
79
82
  ToolChoice,
@@ -247,7 +250,7 @@ class GoogleGenAIAPI(ModelAPI):
247
250
 
248
251
  # Create google-genai types.
249
252
  gemini_contents = await as_chat_messages(client, input)
250
- gemini_tools = chat_tools(tools) if len(tools) > 0 else None
253
+ gemini_tools = self.chat_tools(tools) if len(tools) > 0 else None
251
254
  gemini_tool_config = chat_tool_config(tool_choice) if len(tools) > 0 else None
252
255
  parameters = GenerateContentConfig(
253
256
  http_options=HttpOptions(headers={HttpHooks.REQUEST_ID_HEADER: request_id}),
@@ -362,6 +365,61 @@ class GoogleGenAIAPI(ModelAPI):
362
365
  else:
363
366
  return None
364
367
 
368
+ def _use_native_search(self, tool: ToolInfo) -> bool:
369
+ return (
370
+ tool.name == "web_search"
371
+ and tool.options is not None
372
+ and "gemini" in tool.options
373
+ # Support "starts with" Gemini 2.0
374
+ and (self.is_gemini() and not self.is_gemini_1_5())
375
+ )
376
+
377
+ def _categorize_tool(
378
+ self, acc: tuple[bool, list[FunctionDeclaration]], tool: ToolInfo
379
+ ) -> tuple[bool, list[FunctionDeclaration]]:
380
+ """Reducer function that categorizes tools into native search vs function declarations.
381
+
382
+ Returns:
383
+ Tuple of (has_native_search, function_declarations) where has_native_search
384
+ is True if any tool uses native search, and function_declarations contains
385
+ all non-native-search tools converted to FunctionDeclaration objects.
386
+ """
387
+ return (
388
+ (True, acc[1])
389
+ if self._use_native_search(tool)
390
+ else (
391
+ acc[0],
392
+ acc[1]
393
+ + [
394
+ FunctionDeclaration(
395
+ name=tool.name,
396
+ description=tool.description,
397
+ parameters=schema_from_param(tool.parameters)
398
+ if len(tool.parameters.properties) > 0
399
+ else None,
400
+ )
401
+ ],
402
+ )
403
+ )
404
+
405
+ def chat_tools(self, tools: list[ToolInfo]) -> ToolListUnion:
406
+ has_native_search, function_declarations = functools.reduce(
407
+ self._categorize_tool, tools, (False, list[FunctionDeclaration]())
408
+ )
409
+
410
+ # TODO: Google doesn't yet support native search concurrently with other tools.
411
+ # Revisit this from time to time to adapt when they fix it.
412
+ if has_native_search and function_declarations:
413
+ raise ValueError(
414
+ "Gemini does not yet support native search concurrently with other tools."
415
+ )
416
+
417
+ return (
418
+ [Tool(google_search=GoogleSearch())]
419
+ if has_native_search
420
+ else [Tool(function_declarations=function_declarations)]
421
+ )
422
+
365
423
 
366
424
  def safety_settings_to_list(
367
425
  safety_settings: list[SafetySettingDict],
@@ -500,6 +558,8 @@ async def content_part(client: Client, content: InspectContent | str) -> Part:
500
558
  return Part.from_text(text=content.text or NO_CONTENT)
501
559
  elif isinstance(content, ContentReasoning):
502
560
  return Part.from_text(text=content.reasoning or NO_CONTENT)
561
+ elif isinstance(content, ContentData):
562
+ assert False, "Google provider should never encounter ContentData"
503
563
  else:
504
564
  return await chat_content_to_part(client, content)
505
565
 
@@ -538,20 +598,6 @@ async def extract_system_message_as_parts(
538
598
  return system_parts or None
539
599
 
540
600
 
541
- def chat_tools(tools: list[ToolInfo]) -> ToolListUnion:
542
- declarations = [
543
- FunctionDeclaration(
544
- name=tool.name,
545
- description=tool.description,
546
- parameters=schema_from_param(tool.parameters)
547
- if len(tool.parameters.properties) > 0
548
- else None,
549
- )
550
- for tool in tools
551
- ]
552
- return [Tool(function_declarations=declarations)]
553
-
554
-
555
601
  # https://ai.google.dev/gemini-api/tutorials/extract_structured_data#define_the_schema
556
602
  def schema_from_param(
557
603
  param: ToolParam | ToolParams, nullable: bool | None = False
@@ -656,19 +702,36 @@ def completion_choice_from_candidate(
656
702
  | ContentImage
657
703
  | ContentAudio
658
704
  | ContentVideo
705
+ | ContentData
659
706
  ]
660
707
  ) = ""
661
708
  # content.parts can be None when the finish_reason is MALFORMED_FUNCTION_CALL
662
709
  elif candidate.content.parts is None:
663
710
  content = ""
664
711
  else:
665
- content = []
666
- for part in candidate.content.parts:
667
- if part.text is not None:
668
- if part.thought is True:
669
- content.append(ContentReasoning(reasoning=part.text))
670
- else:
671
- content.append(ContentText(text=part.text))
712
+ # Google's grounded search metadata provides start/end indices for cited
713
+ # text based on the joining of all separate text parts (despite the doc
714
+ # suggesting that they provide part_index). Thankfully, the doc also says:
715
+ #
716
+ # Exactly one field within a Part should be set, representing the specific type
717
+ # of content being conveyed. Using multiple fields within the same `Part`
718
+ # instance is considered invalid.
719
+ #
720
+ # That means that we can safely collapse adjacent parts with a `text` field
721
+ # and not fear that we're breaking other types of content parts
722
+ parts = functools.reduce(
723
+ _combine_text_parts, candidate.content.parts, list[Part]()
724
+ )
725
+
726
+ content = [
727
+ ContentReasoning(reasoning=part.text)
728
+ if part.thought is True
729
+ else ContentText(
730
+ text=part.text, citations=get_candidate_citations(candidate)
731
+ )
732
+ for part in parts
733
+ if part.text is not None
734
+ ]
672
735
 
673
736
  # now tool calls
674
737
  tool_calls: list[ToolCall] = []
@@ -922,3 +985,12 @@ async def file_for_content(
922
985
  files_db.put(content_sha256, str(upload.name))
923
986
  # return the file
924
987
  return upload
988
+
989
+
990
+ def _combine_text_parts(acc: list[Part], part: Part) -> list[Part]:
991
+ """Combine adjacent text parts into a single part."""
992
+ return (
993
+ acc + [part]
994
+ if part.text is None or len(acc) == 0 or acc[-1].text is None
995
+ else acc[:-1] + [Part(text=acc[-1].text + part.text)]
996
+ )
@@ -44,9 +44,15 @@ from typing_extensions import override
44
44
  # TODO: Migration guide:
45
45
  # https://github.com/mistralai/client-python/blob/main/MIGRATION.md
46
46
  from inspect_ai._util.constants import NO_CONTENT
47
- from inspect_ai._util.content import Content, ContentImage, ContentText
47
+ from inspect_ai._util.content import (
48
+ Content,
49
+ ContentImage,
50
+ ContentReasoning,
51
+ ContentText,
52
+ )
48
53
  from inspect_ai._util.http import is_retryable_http_status
49
54
  from inspect_ai._util.images import file_as_data_uri
55
+ from inspect_ai.model._reasoning import parse_content_with_reasoning
50
56
  from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
51
57
 
52
58
  from ..._util.httpx import httpx_should_retry
@@ -481,26 +487,33 @@ def completion_content(content: str | list[ContentChunk]) -> str | list[Content]
481
487
  if isinstance(content, str):
482
488
  return content
483
489
  else:
484
- return [completion_content_chunk(c) for c in content]
490
+ return [item for c in content for item in completion_content_chunks(c)]
485
491
 
486
492
 
487
- def completion_content_chunk(content: ContentChunk) -> Content:
493
+ def completion_content_chunks(content: ContentChunk) -> list[Content]:
488
494
  if isinstance(content, ReferenceChunk):
489
495
  raise TypeError("ReferenceChunk content is not supported by Inspect.")
490
496
  elif isinstance(content, TextChunk):
491
- return ContentText(text=content.text)
497
+ parsed = parse_content_with_reasoning(content.text)
498
+ if parsed:
499
+ return [
500
+ ContentReasoning(reasoning=parsed.reasoning),
501
+ ContentText(text=parsed.content),
502
+ ]
503
+ else:
504
+ return [ContentText(text=content.text)]
492
505
  elif isinstance(content, DocumentURLChunk):
493
- return ContentText(text=content.document_url)
506
+ return [ContentText(text=content.document_url)]
494
507
  else:
495
508
  if isinstance(content.image_url, str):
496
- return ContentImage(image=content.image_url)
509
+ return [ContentImage(image=content.image_url)]
497
510
  else:
498
511
  match content.image_url.detail:
499
512
  case "low" | "high":
500
513
  detail: Literal["auto", "low", "high"] = content.image_url.detail
501
514
  case _:
502
515
  detail = "auto"
503
- return ContentImage(image=content.image_url.url, detail=detail)
516
+ return [ContentImage(image=content.image_url.url, detail=detail)]
504
517
 
505
518
 
506
519
  def completion_choices_from_response(
@@ -13,6 +13,7 @@ from openai._types import NOT_GIVEN
13
13
  from openai.types.chat import ChatCompletion
14
14
  from typing_extensions import override
15
15
 
16
+ from inspect_ai._util.deprecation import deprecation_warning
16
17
  from inspect_ai._util.error import PrerequisiteError
17
18
  from inspect_ai._util.logger import warn_once
18
19
  from inspect_ai.model._openai import chat_choices_from_openai
@@ -64,6 +65,8 @@ class OpenAIAPI(ModelAPI):
64
65
  api_key: str | None = None,
65
66
  config: GenerateConfig = GenerateConfig(),
66
67
  responses_api: bool | None = None,
68
+ # Can't use the XxxDeprecatedArgs approach since this already has a **param
69
+ # but responses_store is deprecated and should not be used.
67
70
  responses_store: Literal["auto"] | bool = "auto",
68
71
  service_tier: str | None = None,
69
72
  client_timeout: float | None = None,
@@ -88,19 +91,18 @@ class OpenAIAPI(ModelAPI):
88
91
  )
89
92
 
90
93
  # is this a model we use responses api by default for?
91
- responses_model = (
92
- (self.is_o_series() and not self.is_o1_early())
93
- or self.is_computer_use_preview()
94
- or self.is_codex()
95
- )
94
+ responses_preferred = (
95
+ self.is_o_series() and not self.is_o1_early()
96
+ ) or self.is_codex()
96
97
 
97
98
  # resolve whether we are forcing the responses api
98
- self.responses_api = responses_api or responses_model
99
+ self.responses_api = self.is_computer_use_preview() or (
100
+ responses_api if responses_api is not None else responses_preferred
101
+ )
99
102
 
100
103
  # resolve whether we are using the responses store
101
- self.responses_store = (
102
- responses_store if isinstance(responses_store, bool) else responses_model
103
- )
104
+ if isinstance(responses_store, bool):
105
+ deprecation_warning("`responses_store` is no longer supported.")
104
106
 
105
107
  # set service tier if specified
106
108
  self.service_tier = service_tier
@@ -260,7 +262,6 @@ class OpenAIAPI(ModelAPI):
260
262
  tool_choice=tool_choice,
261
263
  config=config,
262
264
  service_tier=self.service_tier,
263
- store=self.responses_store,
264
265
  )
265
266
 
266
267
  # allocate request_id (so we can see it from ModelCall)