inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +2 -2
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +1 -1
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/www/dist/assets/index.css +82 -24
- inspect_ai/_view/www/dist/assets/index.js +10124 -9808
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +3 -3
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +196 -34
- inspect_ai/model/_providers/google.py +94 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_span.py +12 -1
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,8 @@ from anthropic.types import (
|
|
22
22
|
MessageParam,
|
23
23
|
RedactedThinkingBlock,
|
24
24
|
RedactedThinkingBlockParam,
|
25
|
+
ServerToolUseBlock,
|
26
|
+
ServerToolUseBlockParam,
|
25
27
|
TextBlock,
|
26
28
|
TextBlockParam,
|
27
29
|
ThinkingBlock,
|
@@ -31,6 +33,9 @@ from anthropic.types import (
|
|
31
33
|
ToolTextEditor20250124Param,
|
32
34
|
ToolUseBlock,
|
33
35
|
ToolUseBlockParam,
|
36
|
+
WebSearchTool20250305Param,
|
37
|
+
WebSearchToolResultBlock,
|
38
|
+
WebSearchToolResultBlockParam,
|
34
39
|
message_create_params,
|
35
40
|
)
|
36
41
|
from anthropic.types.beta import (
|
@@ -43,6 +48,7 @@ from typing_extensions import override
|
|
43
48
|
from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
|
44
49
|
from inspect_ai._util.content import (
|
45
50
|
Content,
|
51
|
+
ContentData,
|
46
52
|
ContentImage,
|
47
53
|
ContentReasoning,
|
48
54
|
ContentText,
|
@@ -61,6 +67,10 @@ from .._generate_config import GenerateConfig
|
|
61
67
|
from .._model import ModelAPI
|
62
68
|
from .._model_call import ModelCall
|
63
69
|
from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage, StopReason
|
70
|
+
from .._providers._anthropic_citations import (
|
71
|
+
to_anthropic_citation,
|
72
|
+
to_inspect_citation,
|
73
|
+
)
|
64
74
|
from .util import environment_prerequisite_error, model_base_url
|
65
75
|
from .util.hooks import HttpxHooks
|
66
76
|
|
@@ -70,6 +80,14 @@ ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
|
|
70
80
|
|
71
81
|
INTERNAL_COMPUTER_TOOL_NAME = "computer"
|
72
82
|
|
83
|
+
WEB_SEARCH_COMPATIBLE_MODELS = [
|
84
|
+
"claude-opus-4-20250514",
|
85
|
+
"claude-sonnet-4-20250514",
|
86
|
+
"claude-3-7-sonnet-20250219",
|
87
|
+
"claude-3-5-sonnet-latest",
|
88
|
+
"claude-3-5-haiku-latest",
|
89
|
+
]
|
90
|
+
|
73
91
|
|
74
92
|
class AnthropicAPI(ModelAPI):
|
75
93
|
def __init__(
|
@@ -232,27 +250,19 @@ class AnthropicAPI(ModelAPI):
|
|
232
250
|
if self.extra_body is not None:
|
233
251
|
request["extra_body"] = self.extra_body
|
234
252
|
|
235
|
-
# make request (unless
|
253
|
+
# make request (unless overridden, stream if we are using reasoning)
|
236
254
|
streaming = (
|
237
255
|
self.is_using_thinking(config)
|
238
256
|
if self.streaming == "auto"
|
239
257
|
else self.streaming
|
240
258
|
)
|
241
|
-
if streaming:
|
242
|
-
async with self.client.messages.stream(**request) as stream:
|
243
|
-
message = await stream.get_final_message()
|
244
|
-
else:
|
245
|
-
message = await self.client.messages.create(**request, stream=False)
|
246
259
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
# extract output
|
251
|
-
output = await model_output_from_message(
|
252
|
-
self.client, self.service_model_name(), message, tools
|
260
|
+
message, output = await self._perform_request_and_continuations(
|
261
|
+
request, streaming, tools
|
253
262
|
)
|
254
263
|
|
255
|
-
|
264
|
+
response = message.model_dump()
|
265
|
+
|
256
266
|
return output, model_call()
|
257
267
|
|
258
268
|
except BadRequestError as ex:
|
@@ -269,6 +279,50 @@ class AnthropicAPI(ModelAPI):
|
|
269
279
|
else:
|
270
280
|
raise ex
|
271
281
|
|
282
|
+
async def _perform_request_and_continuations(
|
283
|
+
self,
|
284
|
+
request: dict[str, Any],
|
285
|
+
streaming: bool,
|
286
|
+
tools: list[ToolInfo],
|
287
|
+
) -> tuple[Message, ModelOutput]:
|
288
|
+
"""
|
289
|
+
This helper function is split out so that it can be easily call itself recursively in cases where the model requires a continuation
|
290
|
+
|
291
|
+
It considers the result from the initial request the "head" and the result
|
292
|
+
from the continuation the "tail".
|
293
|
+
"""
|
294
|
+
if streaming:
|
295
|
+
async with self.client.messages.stream(**request) as stream:
|
296
|
+
head_message = await stream.get_final_message()
|
297
|
+
else:
|
298
|
+
head_message = await self.client.messages.create(**request, stream=False)
|
299
|
+
|
300
|
+
head_model_output, continuation_required = await model_output_from_message(
|
301
|
+
self.client, self.service_model_name(), head_message, tools
|
302
|
+
)
|
303
|
+
|
304
|
+
if continuation_required:
|
305
|
+
tail_request = dict(request)
|
306
|
+
tail_request["messages"] = request["messages"] + [
|
307
|
+
MessageParam(role=head_message.role, content=head_message.content)
|
308
|
+
]
|
309
|
+
_, tail_model_output = await self._perform_request_and_continuations(
|
310
|
+
tail_request, streaming, tools
|
311
|
+
)
|
312
|
+
|
313
|
+
head_content = _content_list(head_model_output.message.content)
|
314
|
+
tail_content = _content_list(tail_model_output.message.content)
|
315
|
+
tail_model_output.message.content = head_content + tail_content
|
316
|
+
|
317
|
+
# TODO:
|
318
|
+
# It looks weird to return the head message with the tail output, but
|
319
|
+
# the contract for this function is that it returns the head message
|
320
|
+
# even when it has needed to recurse. This is because model_call()
|
321
|
+
# above doesn't currently support multiple requests
|
322
|
+
return head_message, tail_model_output
|
323
|
+
|
324
|
+
return head_message, head_model_output
|
325
|
+
|
272
326
|
def completion_config(
|
273
327
|
self, config: GenerateConfig
|
274
328
|
) -> tuple[dict[str, Any], dict[str, str], list[str]]:
|
@@ -521,7 +575,11 @@ class AnthropicAPI(ModelAPI):
|
|
521
575
|
self, tool: ToolInfo, config: GenerateConfig
|
522
576
|
) -> Optional["ToolParamDef"]:
|
523
577
|
return (
|
524
|
-
(
|
578
|
+
(
|
579
|
+
self.computer_use_tool_param(tool)
|
580
|
+
or self.text_editor_tool_param(tool)
|
581
|
+
or self.web_search_tool_param(tool)
|
582
|
+
)
|
525
583
|
if config.internal_tools is not False
|
526
584
|
else None
|
527
585
|
)
|
@@ -598,6 +656,49 @@ class AnthropicAPI(ModelAPI):
|
|
598
656
|
else:
|
599
657
|
return None
|
600
658
|
|
659
|
+
def web_search_tool_param(
|
660
|
+
self, tool: ToolInfo
|
661
|
+
) -> WebSearchTool20250305Param | None:
|
662
|
+
if (
|
663
|
+
tool.name == "web_search"
|
664
|
+
and tool.options
|
665
|
+
and "anthropic" in tool.options
|
666
|
+
and self.model_name in WEB_SEARCH_COMPATIBLE_MODELS
|
667
|
+
):
|
668
|
+
return _web_search_tool_param(tool.options["anthropic"])
|
669
|
+
else:
|
670
|
+
return None
|
671
|
+
|
672
|
+
|
673
|
+
def _web_search_tool_param(
|
674
|
+
maybe_anthropic_options: object,
|
675
|
+
) -> WebSearchTool20250305Param:
|
676
|
+
if maybe_anthropic_options is not None and not isinstance(
|
677
|
+
maybe_anthropic_options, dict
|
678
|
+
):
|
679
|
+
raise TypeError(
|
680
|
+
f"Expected a dictionary for anthropic_options, got {type(maybe_anthropic_options)}"
|
681
|
+
)
|
682
|
+
|
683
|
+
result = WebSearchTool20250305Param(
|
684
|
+
name="web_search",
|
685
|
+
type="web_search_20250305",
|
686
|
+
)
|
687
|
+
|
688
|
+
if maybe_anthropic_options:
|
689
|
+
if "allowed_domains" in maybe_anthropic_options:
|
690
|
+
result["allowed_domains"] = maybe_anthropic_options["allowed_domains"]
|
691
|
+
if "blocked_domains" in maybe_anthropic_options:
|
692
|
+
result["blocked_domains"] = maybe_anthropic_options["blocked_domains"]
|
693
|
+
if "cache_control" in maybe_anthropic_options:
|
694
|
+
result["cache_control"] = maybe_anthropic_options["cache_control"]
|
695
|
+
if "max_uses" in maybe_anthropic_options:
|
696
|
+
result["max_uses"] = maybe_anthropic_options["max_uses"]
|
697
|
+
if "user_location" in maybe_anthropic_options:
|
698
|
+
result["user_location"] = maybe_anthropic_options["user_location"]
|
699
|
+
|
700
|
+
return result
|
701
|
+
|
601
702
|
|
602
703
|
# tools can be either a stock tool param or a special Anthropic native use tool param
|
603
704
|
ToolParamDef = (
|
@@ -605,6 +706,7 @@ ToolParamDef = (
|
|
605
706
|
| BetaToolComputerUse20250124Param
|
606
707
|
| ToolTextEditor20250124Param
|
607
708
|
| BetaToolTextEditor20241022Param
|
709
|
+
| WebSearchTool20250305Param
|
608
710
|
)
|
609
711
|
|
610
712
|
|
@@ -614,6 +716,7 @@ def add_cache_control(
|
|
614
716
|
| BetaToolComputerUse20250124Param
|
615
717
|
| ToolTextEditor20250124Param
|
616
718
|
| BetaToolTextEditor20241022Param
|
719
|
+
| WebSearchTool20250305Param
|
617
720
|
| dict[str, Any],
|
618
721
|
) -> None:
|
619
722
|
cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
|
@@ -698,6 +801,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
698
801
|
| ImageBlockParam
|
699
802
|
| ThinkingBlockParam
|
700
803
|
| RedactedThinkingBlockParam
|
804
|
+
| ServerToolUseBlockParam
|
805
|
+
| WebSearchToolResultBlockParam
|
701
806
|
]
|
702
807
|
) = message.error.message
|
703
808
|
# anthropic requires that content be populated when
|
@@ -735,6 +840,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
735
840
|
| RedactedThinkingBlockParam
|
736
841
|
| ImageBlockParam
|
737
842
|
| ToolUseBlockParam
|
843
|
+
| ServerToolUseBlockParam
|
844
|
+
| WebSearchToolResultBlockParam
|
738
845
|
] = (
|
739
846
|
[TextBlockParam(type="text", text=message.content or NO_CONTENT)]
|
740
847
|
if isinstance(message.content, str)
|
@@ -785,7 +892,7 @@ async def model_output_from_message(
|
|
785
892
|
model: str,
|
786
893
|
message: Message,
|
787
894
|
tools: list[ToolInfo],
|
788
|
-
) -> ModelOutput:
|
895
|
+
) -> tuple[ModelOutput, bool]:
|
789
896
|
# extract content and tool calls
|
790
897
|
content: list[Content] = []
|
791
898
|
reasoning_tokens = 0
|
@@ -800,7 +907,20 @@ async def model_output_from_message(
|
|
800
907
|
content_text = content_text.replace("<result>", "").replace(
|
801
908
|
"</result>", ""
|
802
909
|
)
|
803
|
-
content.append(
|
910
|
+
content.append(
|
911
|
+
ContentText(
|
912
|
+
type="text",
|
913
|
+
text=content_text,
|
914
|
+
citations=(
|
915
|
+
[
|
916
|
+
to_inspect_citation(citation)
|
917
|
+
for citation in content_block.citations
|
918
|
+
]
|
919
|
+
if content_block.citations
|
920
|
+
else None
|
921
|
+
),
|
922
|
+
)
|
923
|
+
)
|
804
924
|
elif isinstance(content_block, ToolUseBlock):
|
805
925
|
tool_calls = tool_calls or []
|
806
926
|
(tool_name, internal_name) = _names_for_tool_call(content_block.name, tools)
|
@@ -812,6 +932,10 @@ async def model_output_from_message(
|
|
812
932
|
internal=internal_name,
|
813
933
|
)
|
814
934
|
)
|
935
|
+
elif isinstance(content_block, ServerToolUseBlock):
|
936
|
+
content.append(ContentData(data=content_block.model_dump()))
|
937
|
+
elif isinstance(content_block, WebSearchToolResultBlock):
|
938
|
+
content.append(ContentData(data=content_block.model_dump()))
|
815
939
|
elif isinstance(content_block, RedactedThinkingBlock):
|
816
940
|
content.append(
|
817
941
|
ContentReasoning(reasoning=content_block.data, redacted=True)
|
@@ -827,11 +951,12 @@ async def model_output_from_message(
|
|
827
951
|
)
|
828
952
|
|
829
953
|
# resolve choice
|
954
|
+
stop_reason, pause_turn = message_stop_reason(message)
|
830
955
|
choice = ChatCompletionChoice(
|
831
956
|
message=ChatMessageAssistant(
|
832
957
|
content=content, tool_calls=tool_calls, model=model, source="generate"
|
833
958
|
),
|
834
|
-
stop_reason=
|
959
|
+
stop_reason=stop_reason,
|
835
960
|
)
|
836
961
|
|
837
962
|
# return ModelOutput
|
@@ -844,17 +969,20 @@ async def model_output_from_message(
|
|
844
969
|
+ (input_tokens_cache_read or 0)
|
845
970
|
+ message.usage.output_tokens # includes reasoning tokens
|
846
971
|
)
|
847
|
-
return
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
972
|
+
return (
|
973
|
+
ModelOutput(
|
974
|
+
model=message.model,
|
975
|
+
choices=[choice],
|
976
|
+
usage=ModelUsage(
|
977
|
+
input_tokens=message.usage.input_tokens,
|
978
|
+
output_tokens=message.usage.output_tokens,
|
979
|
+
total_tokens=total_tokens,
|
980
|
+
input_tokens_cache_write=input_tokens_cache_write,
|
981
|
+
input_tokens_cache_read=input_tokens_cache_read,
|
982
|
+
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
|
983
|
+
),
|
857
984
|
),
|
985
|
+
pause_turn,
|
858
986
|
)
|
859
987
|
|
860
988
|
|
@@ -893,16 +1021,18 @@ def _names_for_tool_call(
|
|
893
1021
|
)
|
894
1022
|
|
895
1023
|
|
896
|
-
def message_stop_reason(message: Message) -> StopReason:
|
1024
|
+
def message_stop_reason(message: Message) -> tuple[StopReason, bool]:
|
897
1025
|
match message.stop_reason:
|
898
1026
|
case "end_turn" | "stop_sequence":
|
899
|
-
return "stop"
|
1027
|
+
return "stop", False
|
900
1028
|
case "tool_use":
|
901
|
-
return "tool_calls"
|
1029
|
+
return "tool_calls", False
|
902
1030
|
case "max_tokens":
|
903
|
-
return message.stop_reason
|
1031
|
+
return message.stop_reason, False
|
1032
|
+
case "refusal":
|
1033
|
+
return "content_filter", False
|
904
1034
|
case _:
|
905
|
-
return "unknown"
|
1035
|
+
return "unknown", message.stop_reason == "pause_turn"
|
906
1036
|
|
907
1037
|
|
908
1038
|
def split_system_messages(
|
@@ -918,9 +1048,24 @@ def split_system_messages(
|
|
918
1048
|
|
919
1049
|
async def message_param_content(
|
920
1050
|
content: Content,
|
921
|
-
) ->
|
1051
|
+
) -> (
|
1052
|
+
TextBlockParam
|
1053
|
+
| ImageBlockParam
|
1054
|
+
| ThinkingBlockParam
|
1055
|
+
| RedactedThinkingBlockParam
|
1056
|
+
| ServerToolUseBlockParam
|
1057
|
+
| WebSearchToolResultBlockParam
|
1058
|
+
):
|
922
1059
|
if isinstance(content, ContentText):
|
923
|
-
|
1060
|
+
citations = (
|
1061
|
+
[to_anthropic_citation(citation) for citation in content.citations]
|
1062
|
+
if content.citations
|
1063
|
+
else None
|
1064
|
+
)
|
1065
|
+
|
1066
|
+
return TextBlockParam(
|
1067
|
+
type="text", text=content.text or NO_CONTENT, citations=citations
|
1068
|
+
)
|
924
1069
|
elif isinstance(content, ContentImage):
|
925
1070
|
# resolve to url
|
926
1071
|
image = await file_as_data_uri(content.image)
|
@@ -948,6 +1093,19 @@ async def message_param_content(
|
|
948
1093
|
return ThinkingBlockParam(
|
949
1094
|
type="thinking", thinking=content.reasoning, signature=content.signature
|
950
1095
|
)
|
1096
|
+
elif isinstance(content, ContentData):
|
1097
|
+
match content.data.get("type", None):
|
1098
|
+
case "server_tool_use":
|
1099
|
+
return cast(
|
1100
|
+
ServerToolUseBlockParam,
|
1101
|
+
ServerToolUseBlock.model_validate(content.data).model_dump(),
|
1102
|
+
)
|
1103
|
+
case "web_search_tool_result":
|
1104
|
+
return cast(
|
1105
|
+
WebSearchToolResultBlockParam,
|
1106
|
+
WebSearchToolResultBlock.model_validate(content.data).model_dump(),
|
1107
|
+
)
|
1108
|
+
raise NotImplementedError()
|
951
1109
|
else:
|
952
1110
|
raise RuntimeError(
|
953
1111
|
"Anthropic models do not currently support audio or video inputs."
|
@@ -990,3 +1148,7 @@ def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
|
|
990
1148
|
value = copy(value)
|
991
1149
|
value.update(data=BASE_64_DATA_REMOVED)
|
992
1150
|
return value
|
1151
|
+
|
1152
|
+
|
1153
|
+
def _content_list(input: str | list[Content]) -> list[Content]:
|
1154
|
+
return [ContentText(text=input)] if isinstance(input, str) else input
|
@@ -26,6 +26,7 @@ from google.genai.types import (
|
|
26
26
|
GenerateContentResponse,
|
27
27
|
GenerateContentResponsePromptFeedback,
|
28
28
|
GenerateContentResponseUsageMetadata,
|
29
|
+
GoogleSearch,
|
29
30
|
HarmBlockThreshold,
|
30
31
|
HarmCategory,
|
31
32
|
HttpOptions,
|
@@ -48,6 +49,7 @@ from inspect_ai._util.content import (
|
|
48
49
|
)
|
49
50
|
from inspect_ai._util.content import (
|
50
51
|
ContentAudio,
|
52
|
+
ContentData,
|
51
53
|
ContentImage,
|
52
54
|
ContentReasoning,
|
53
55
|
ContentText,
|
@@ -74,6 +76,7 @@ from inspect_ai.model import (
|
|
74
76
|
TopLogprob,
|
75
77
|
)
|
76
78
|
from inspect_ai.model._model_call import ModelCall
|
79
|
+
from inspect_ai.model._providers._google_citations import get_candidate_citations
|
77
80
|
from inspect_ai.tool import (
|
78
81
|
ToolCall,
|
79
82
|
ToolChoice,
|
@@ -247,7 +250,7 @@ class GoogleGenAIAPI(ModelAPI):
|
|
247
250
|
|
248
251
|
# Create google-genai types.
|
249
252
|
gemini_contents = await as_chat_messages(client, input)
|
250
|
-
gemini_tools = chat_tools(tools) if len(tools) > 0 else None
|
253
|
+
gemini_tools = self.chat_tools(tools) if len(tools) > 0 else None
|
251
254
|
gemini_tool_config = chat_tool_config(tool_choice) if len(tools) > 0 else None
|
252
255
|
parameters = GenerateContentConfig(
|
253
256
|
http_options=HttpOptions(headers={HttpHooks.REQUEST_ID_HEADER: request_id}),
|
@@ -362,6 +365,61 @@ class GoogleGenAIAPI(ModelAPI):
|
|
362
365
|
else:
|
363
366
|
return None
|
364
367
|
|
368
|
+
def _use_native_search(self, tool: ToolInfo) -> bool:
|
369
|
+
return (
|
370
|
+
tool.name == "web_search"
|
371
|
+
and tool.options is not None
|
372
|
+
and "gemini" in tool.options
|
373
|
+
# Support "starts with" Gemini 2.0
|
374
|
+
and (self.is_gemini() and not self.is_gemini_1_5())
|
375
|
+
)
|
376
|
+
|
377
|
+
def _categorize_tool(
|
378
|
+
self, acc: tuple[bool, list[FunctionDeclaration]], tool: ToolInfo
|
379
|
+
) -> tuple[bool, list[FunctionDeclaration]]:
|
380
|
+
"""Reducer function that categorizes tools into native search vs function declarations.
|
381
|
+
|
382
|
+
Returns:
|
383
|
+
Tuple of (has_native_search, function_declarations) where has_native_search
|
384
|
+
is True if any tool uses native search, and function_declarations contains
|
385
|
+
all non-native-search tools converted to FunctionDeclaration objects.
|
386
|
+
"""
|
387
|
+
return (
|
388
|
+
(True, acc[1])
|
389
|
+
if self._use_native_search(tool)
|
390
|
+
else (
|
391
|
+
acc[0],
|
392
|
+
acc[1]
|
393
|
+
+ [
|
394
|
+
FunctionDeclaration(
|
395
|
+
name=tool.name,
|
396
|
+
description=tool.description,
|
397
|
+
parameters=schema_from_param(tool.parameters)
|
398
|
+
if len(tool.parameters.properties) > 0
|
399
|
+
else None,
|
400
|
+
)
|
401
|
+
],
|
402
|
+
)
|
403
|
+
)
|
404
|
+
|
405
|
+
def chat_tools(self, tools: list[ToolInfo]) -> ToolListUnion:
|
406
|
+
has_native_search, function_declarations = functools.reduce(
|
407
|
+
self._categorize_tool, tools, (False, list[FunctionDeclaration]())
|
408
|
+
)
|
409
|
+
|
410
|
+
# TODO: Google doesn't yet support native search concurrently with other tools.
|
411
|
+
# Revisit this from time to time to adapt when they fix it.
|
412
|
+
if has_native_search and function_declarations:
|
413
|
+
raise ValueError(
|
414
|
+
"Gemini does not yet support native search concurrently with other tools."
|
415
|
+
)
|
416
|
+
|
417
|
+
return (
|
418
|
+
[Tool(google_search=GoogleSearch())]
|
419
|
+
if has_native_search
|
420
|
+
else [Tool(function_declarations=function_declarations)]
|
421
|
+
)
|
422
|
+
|
365
423
|
|
366
424
|
def safety_settings_to_list(
|
367
425
|
safety_settings: list[SafetySettingDict],
|
@@ -500,6 +558,8 @@ async def content_part(client: Client, content: InspectContent | str) -> Part:
|
|
500
558
|
return Part.from_text(text=content.text or NO_CONTENT)
|
501
559
|
elif isinstance(content, ContentReasoning):
|
502
560
|
return Part.from_text(text=content.reasoning or NO_CONTENT)
|
561
|
+
elif isinstance(content, ContentData):
|
562
|
+
assert False, "Google provider should never encounter ContentData"
|
503
563
|
else:
|
504
564
|
return await chat_content_to_part(client, content)
|
505
565
|
|
@@ -538,20 +598,6 @@ async def extract_system_message_as_parts(
|
|
538
598
|
return system_parts or None
|
539
599
|
|
540
600
|
|
541
|
-
def chat_tools(tools: list[ToolInfo]) -> ToolListUnion:
|
542
|
-
declarations = [
|
543
|
-
FunctionDeclaration(
|
544
|
-
name=tool.name,
|
545
|
-
description=tool.description,
|
546
|
-
parameters=schema_from_param(tool.parameters)
|
547
|
-
if len(tool.parameters.properties) > 0
|
548
|
-
else None,
|
549
|
-
)
|
550
|
-
for tool in tools
|
551
|
-
]
|
552
|
-
return [Tool(function_declarations=declarations)]
|
553
|
-
|
554
|
-
|
555
601
|
# https://ai.google.dev/gemini-api/tutorials/extract_structured_data#define_the_schema
|
556
602
|
def schema_from_param(
|
557
603
|
param: ToolParam | ToolParams, nullable: bool | None = False
|
@@ -656,19 +702,36 @@ def completion_choice_from_candidate(
|
|
656
702
|
| ContentImage
|
657
703
|
| ContentAudio
|
658
704
|
| ContentVideo
|
705
|
+
| ContentData
|
659
706
|
]
|
660
707
|
) = ""
|
661
708
|
# content.parts can be None when the finish_reason is MALFORMED_FUNCTION_CALL
|
662
709
|
elif candidate.content.parts is None:
|
663
710
|
content = ""
|
664
711
|
else:
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
712
|
+
# Google's grounded search metadata provides start/end indices for cited
|
713
|
+
# text based on the joining of all separate text parts (despite the doc
|
714
|
+
# suggesting that they provide part_index). Thankfully, the doc also says:
|
715
|
+
#
|
716
|
+
# Exactly one field within a Part should be set, representing the specific type
|
717
|
+
# of content being conveyed. Using multiple fields within the same `Part`
|
718
|
+
# instance is considered invalid.
|
719
|
+
#
|
720
|
+
# That means that we can safely collapse adjacent parts with a `text` field
|
721
|
+
# and not fear that we're breaking other types of content parts
|
722
|
+
parts = functools.reduce(
|
723
|
+
_combine_text_parts, candidate.content.parts, list[Part]()
|
724
|
+
)
|
725
|
+
|
726
|
+
content = [
|
727
|
+
ContentReasoning(reasoning=part.text)
|
728
|
+
if part.thought is True
|
729
|
+
else ContentText(
|
730
|
+
text=part.text, citations=get_candidate_citations(candidate)
|
731
|
+
)
|
732
|
+
for part in parts
|
733
|
+
if part.text is not None
|
734
|
+
]
|
672
735
|
|
673
736
|
# now tool calls
|
674
737
|
tool_calls: list[ToolCall] = []
|
@@ -922,3 +985,12 @@ async def file_for_content(
|
|
922
985
|
files_db.put(content_sha256, str(upload.name))
|
923
986
|
# return the file
|
924
987
|
return upload
|
988
|
+
|
989
|
+
|
990
|
+
def _combine_text_parts(acc: list[Part], part: Part) -> list[Part]:
|
991
|
+
"""Combine adjacent text parts into a single part."""
|
992
|
+
return (
|
993
|
+
acc + [part]
|
994
|
+
if part.text is None or len(acc) == 0 or acc[-1].text is None
|
995
|
+
else acc[:-1] + [Part(text=acc[-1].text + part.text)]
|
996
|
+
)
|
@@ -44,9 +44,15 @@ from typing_extensions import override
|
|
44
44
|
# TODO: Migration guide:
|
45
45
|
# https://github.com/mistralai/client-python/blob/main/MIGRATION.md
|
46
46
|
from inspect_ai._util.constants import NO_CONTENT
|
47
|
-
from inspect_ai._util.content import
|
47
|
+
from inspect_ai._util.content import (
|
48
|
+
Content,
|
49
|
+
ContentImage,
|
50
|
+
ContentReasoning,
|
51
|
+
ContentText,
|
52
|
+
)
|
48
53
|
from inspect_ai._util.http import is_retryable_http_status
|
49
54
|
from inspect_ai._util.images import file_as_data_uri
|
55
|
+
from inspect_ai.model._reasoning import parse_content_with_reasoning
|
50
56
|
from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
|
51
57
|
|
52
58
|
from ..._util.httpx import httpx_should_retry
|
@@ -481,26 +487,33 @@ def completion_content(content: str | list[ContentChunk]) -> str | list[Content]
|
|
481
487
|
if isinstance(content, str):
|
482
488
|
return content
|
483
489
|
else:
|
484
|
-
return [
|
490
|
+
return [item for c in content for item in completion_content_chunks(c)]
|
485
491
|
|
486
492
|
|
487
|
-
def
|
493
|
+
def completion_content_chunks(content: ContentChunk) -> list[Content]:
|
488
494
|
if isinstance(content, ReferenceChunk):
|
489
495
|
raise TypeError("ReferenceChunk content is not supported by Inspect.")
|
490
496
|
elif isinstance(content, TextChunk):
|
491
|
-
|
497
|
+
parsed = parse_content_with_reasoning(content.text)
|
498
|
+
if parsed:
|
499
|
+
return [
|
500
|
+
ContentReasoning(reasoning=parsed.reasoning),
|
501
|
+
ContentText(text=parsed.content),
|
502
|
+
]
|
503
|
+
else:
|
504
|
+
return [ContentText(text=content.text)]
|
492
505
|
elif isinstance(content, DocumentURLChunk):
|
493
|
-
return ContentText(text=content.document_url)
|
506
|
+
return [ContentText(text=content.document_url)]
|
494
507
|
else:
|
495
508
|
if isinstance(content.image_url, str):
|
496
|
-
return ContentImage(image=content.image_url)
|
509
|
+
return [ContentImage(image=content.image_url)]
|
497
510
|
else:
|
498
511
|
match content.image_url.detail:
|
499
512
|
case "low" | "high":
|
500
513
|
detail: Literal["auto", "low", "high"] = content.image_url.detail
|
501
514
|
case _:
|
502
515
|
detail = "auto"
|
503
|
-
return ContentImage(image=content.image_url.url, detail=detail)
|
516
|
+
return [ContentImage(image=content.image_url.url, detail=detail)]
|
504
517
|
|
505
518
|
|
506
519
|
def completion_choices_from_response(
|
@@ -13,6 +13,7 @@ from openai._types import NOT_GIVEN
|
|
13
13
|
from openai.types.chat import ChatCompletion
|
14
14
|
from typing_extensions import override
|
15
15
|
|
16
|
+
from inspect_ai._util.deprecation import deprecation_warning
|
16
17
|
from inspect_ai._util.error import PrerequisiteError
|
17
18
|
from inspect_ai._util.logger import warn_once
|
18
19
|
from inspect_ai.model._openai import chat_choices_from_openai
|
@@ -64,6 +65,8 @@ class OpenAIAPI(ModelAPI):
|
|
64
65
|
api_key: str | None = None,
|
65
66
|
config: GenerateConfig = GenerateConfig(),
|
66
67
|
responses_api: bool | None = None,
|
68
|
+
# Can't use the XxxDeprecatedArgs approach since this already has a **param
|
69
|
+
# but responses_store is deprecated and should not be used.
|
67
70
|
responses_store: Literal["auto"] | bool = "auto",
|
68
71
|
service_tier: str | None = None,
|
69
72
|
client_timeout: float | None = None,
|
@@ -88,19 +91,18 @@ class OpenAIAPI(ModelAPI):
|
|
88
91
|
)
|
89
92
|
|
90
93
|
# is this a model we use responses api by default for?
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
or self.is_codex()
|
95
|
-
)
|
94
|
+
responses_preferred = (
|
95
|
+
self.is_o_series() and not self.is_o1_early()
|
96
|
+
) or self.is_codex()
|
96
97
|
|
97
98
|
# resolve whether we are forcing the responses api
|
98
|
-
self.responses_api =
|
99
|
+
self.responses_api = self.is_computer_use_preview() or (
|
100
|
+
responses_api if responses_api is not None else responses_preferred
|
101
|
+
)
|
99
102
|
|
100
103
|
# resolve whether we are using the responses store
|
101
|
-
|
102
|
-
responses_store
|
103
|
-
)
|
104
|
+
if isinstance(responses_store, bool):
|
105
|
+
deprecation_warning("`responses_store` is no longer supported.")
|
104
106
|
|
105
107
|
# set service tier if specified
|
106
108
|
self.service_tier = service_tier
|
@@ -260,7 +262,6 @@ class OpenAIAPI(ModelAPI):
|
|
260
262
|
tool_choice=tool_choice,
|
261
263
|
config=config,
|
262
264
|
service_tier=self.service_tier,
|
263
|
-
store=self.responses_store,
|
264
265
|
)
|
265
266
|
|
266
267
|
# allocate request_id (so we can see it from ModelCall)
|