inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +3 -3
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +65 -39
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/server.py +17 -0
- inspect_ai/_view/www/dist/assets/index.css +93 -31
- inspect_ai/_view/www/dist/assets/index.js +10639 -10011
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
- inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
- inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
- inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
- inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
- inspect_ai/_view/www/src/client/api/types.ts +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +8 -5
- inspect_ai/agent/_react.py +5 -5
- inspect_ai/dataset/_dataset.py +1 -1
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_samples.py +5 -0
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +17 -8
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +219 -36
- inspect_ai/model/_providers/google.py +98 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/__init__.py +8 -0
- inspect_ai/util/_background.py +64 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_limit.py +72 -5
- inspect_ai/util/_sandbox/__init__.py +2 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/service.py +28 -7
- inspect_ai/util/_span.py +12 -1
- inspect_ai/util/_subprocess.py +51 -38
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,8 @@ from anthropic.types import (
|
|
22
22
|
MessageParam,
|
23
23
|
RedactedThinkingBlock,
|
24
24
|
RedactedThinkingBlockParam,
|
25
|
+
ServerToolUseBlock,
|
26
|
+
ServerToolUseBlockParam,
|
25
27
|
TextBlock,
|
26
28
|
TextBlockParam,
|
27
29
|
ThinkingBlock,
|
@@ -31,11 +33,15 @@ from anthropic.types import (
|
|
31
33
|
ToolTextEditor20250124Param,
|
32
34
|
ToolUseBlock,
|
33
35
|
ToolUseBlockParam,
|
36
|
+
WebSearchTool20250305Param,
|
37
|
+
WebSearchToolResultBlock,
|
38
|
+
WebSearchToolResultBlockParam,
|
34
39
|
message_create_params,
|
35
40
|
)
|
36
41
|
from anthropic.types.beta import (
|
37
42
|
BetaToolComputerUse20250124Param,
|
38
43
|
BetaToolTextEditor20241022Param,
|
44
|
+
BetaToolTextEditor20250429Param,
|
39
45
|
)
|
40
46
|
from pydantic import JsonValue
|
41
47
|
from typing_extensions import override
|
@@ -43,6 +49,7 @@ from typing_extensions import override
|
|
43
49
|
from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
|
44
50
|
from inspect_ai._util.content import (
|
45
51
|
Content,
|
52
|
+
ContentData,
|
46
53
|
ContentImage,
|
47
54
|
ContentReasoning,
|
48
55
|
ContentText,
|
@@ -61,6 +68,10 @@ from .._generate_config import GenerateConfig
|
|
61
68
|
from .._model import ModelAPI
|
62
69
|
from .._model_call import ModelCall
|
63
70
|
from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage, StopReason
|
71
|
+
from .._providers._anthropic_citations import (
|
72
|
+
to_anthropic_citation,
|
73
|
+
to_inspect_citation,
|
74
|
+
)
|
64
75
|
from .util import environment_prerequisite_error, model_base_url
|
65
76
|
from .util.hooks import HttpxHooks
|
66
77
|
|
@@ -70,6 +81,14 @@ ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
|
|
70
81
|
|
71
82
|
INTERNAL_COMPUTER_TOOL_NAME = "computer"
|
72
83
|
|
84
|
+
WEB_SEARCH_COMPATIBLE_MODELS = [
|
85
|
+
"claude-opus-4-20250514",
|
86
|
+
"claude-sonnet-4-20250514",
|
87
|
+
"claude-3-7-sonnet-20250219",
|
88
|
+
"claude-3-5-sonnet-latest",
|
89
|
+
"claude-3-5-haiku-latest",
|
90
|
+
]
|
91
|
+
|
73
92
|
|
74
93
|
class AnthropicAPI(ModelAPI):
|
75
94
|
def __init__(
|
@@ -232,27 +251,19 @@ class AnthropicAPI(ModelAPI):
|
|
232
251
|
if self.extra_body is not None:
|
233
252
|
request["extra_body"] = self.extra_body
|
234
253
|
|
235
|
-
# make request (unless
|
254
|
+
# make request (unless overridden, stream if we are using reasoning)
|
236
255
|
streaming = (
|
237
256
|
self.is_using_thinking(config)
|
238
257
|
if self.streaming == "auto"
|
239
258
|
else self.streaming
|
240
259
|
)
|
241
|
-
if streaming:
|
242
|
-
async with self.client.messages.stream(**request) as stream:
|
243
|
-
message = await stream.get_final_message()
|
244
|
-
else:
|
245
|
-
message = await self.client.messages.create(**request, stream=False)
|
246
260
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
# extract output
|
251
|
-
output = await model_output_from_message(
|
252
|
-
self.client, self.service_model_name(), message, tools
|
261
|
+
message, output = await self._perform_request_and_continuations(
|
262
|
+
request, streaming, tools
|
253
263
|
)
|
254
264
|
|
255
|
-
|
265
|
+
response = message.model_dump()
|
266
|
+
|
256
267
|
return output, model_call()
|
257
268
|
|
258
269
|
except BadRequestError as ex:
|
@@ -269,6 +280,50 @@ class AnthropicAPI(ModelAPI):
|
|
269
280
|
else:
|
270
281
|
raise ex
|
271
282
|
|
283
|
+
async def _perform_request_and_continuations(
|
284
|
+
self,
|
285
|
+
request: dict[str, Any],
|
286
|
+
streaming: bool,
|
287
|
+
tools: list[ToolInfo],
|
288
|
+
) -> tuple[Message, ModelOutput]:
|
289
|
+
"""
|
290
|
+
This helper function is split out so that it can be easily call itself recursively in cases where the model requires a continuation
|
291
|
+
|
292
|
+
It considers the result from the initial request the "head" and the result
|
293
|
+
from the continuation the "tail".
|
294
|
+
"""
|
295
|
+
if streaming:
|
296
|
+
async with self.client.messages.stream(**request) as stream:
|
297
|
+
head_message = await stream.get_final_message()
|
298
|
+
else:
|
299
|
+
head_message = await self.client.messages.create(**request, stream=False)
|
300
|
+
|
301
|
+
head_model_output, continuation_required = await model_output_from_message(
|
302
|
+
self.client, self.service_model_name(), head_message, tools
|
303
|
+
)
|
304
|
+
|
305
|
+
if continuation_required:
|
306
|
+
tail_request = dict(request)
|
307
|
+
tail_request["messages"] = request["messages"] + [
|
308
|
+
MessageParam(role=head_message.role, content=head_message.content)
|
309
|
+
]
|
310
|
+
_, tail_model_output = await self._perform_request_and_continuations(
|
311
|
+
tail_request, streaming, tools
|
312
|
+
)
|
313
|
+
|
314
|
+
head_content = _content_list(head_model_output.message.content)
|
315
|
+
tail_content = _content_list(tail_model_output.message.content)
|
316
|
+
tail_model_output.message.content = head_content + tail_content
|
317
|
+
|
318
|
+
# TODO:
|
319
|
+
# It looks weird to return the head message with the tail output, but
|
320
|
+
# the contract for this function is that it returns the head message
|
321
|
+
# even when it has needed to recurse. This is because model_call()
|
322
|
+
# above doesn't currently support multiple requests
|
323
|
+
return head_message, tail_model_output
|
324
|
+
|
325
|
+
return head_message, head_model_output
|
326
|
+
|
272
327
|
def completion_config(
|
273
328
|
self, config: GenerateConfig
|
274
329
|
) -> tuple[dict[str, Any], dict[str, str], list[str]]:
|
@@ -343,6 +398,9 @@ class AnthropicAPI(ModelAPI):
|
|
343
398
|
def is_claude_3_7(self) -> bool:
|
344
399
|
return "claude-3-7-" in self.service_model_name()
|
345
400
|
|
401
|
+
def is_claude_4(self) -> bool:
|
402
|
+
return re.search(r"claude-4-[a-zA-Z]", self.service_model_name()) is not None
|
403
|
+
|
346
404
|
@override
|
347
405
|
def connection_key(self) -> str:
|
348
406
|
return str(self.api_key)
|
@@ -521,7 +579,11 @@ class AnthropicAPI(ModelAPI):
|
|
521
579
|
self, tool: ToolInfo, config: GenerateConfig
|
522
580
|
) -> Optional["ToolParamDef"]:
|
523
581
|
return (
|
524
|
-
(
|
582
|
+
(
|
583
|
+
self.computer_use_tool_param(tool)
|
584
|
+
or self.text_editor_tool_param(tool)
|
585
|
+
or self.web_search_tool_param(tool)
|
586
|
+
)
|
525
587
|
if config.internal_tools is not False
|
526
588
|
else None
|
527
589
|
)
|
@@ -569,7 +631,17 @@ class AnthropicAPI(ModelAPI):
|
|
569
631
|
|
570
632
|
def text_editor_tool_param(
|
571
633
|
self, tool: ToolInfo
|
572
|
-
) ->
|
634
|
+
) -> (
|
635
|
+
ToolTextEditor20250124Param
|
636
|
+
| BetaToolTextEditor20241022Param
|
637
|
+
| BetaToolTextEditor20250429Param
|
638
|
+
| None
|
639
|
+
):
|
640
|
+
# See: https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/text-editor-tool#before-using-the-text-editor-tool
|
641
|
+
# TODO: It would be great to enhance our `is_claude_xxx` functions to help here.
|
642
|
+
if self.model_name.startswith(("claude-3-5-haiku", "claude-3-opus")):
|
643
|
+
return None
|
644
|
+
|
573
645
|
# check for compatible 'text editor' tool
|
574
646
|
if tool.name == "text_editor" and (
|
575
647
|
sorted(tool.parameters.properties.keys())
|
@@ -586,7 +658,11 @@ class AnthropicAPI(ModelAPI):
|
|
586
658
|
)
|
587
659
|
):
|
588
660
|
return (
|
589
|
-
|
661
|
+
BetaToolTextEditor20250429Param(
|
662
|
+
type="text_editor_20250429", name="str_replace_based_edit_tool"
|
663
|
+
)
|
664
|
+
if self.is_claude_4()
|
665
|
+
else BetaToolTextEditor20241022Param(
|
590
666
|
type="text_editor_20241022", name="str_replace_editor"
|
591
667
|
)
|
592
668
|
if self.is_claude_3_5()
|
@@ -598,6 +674,49 @@ class AnthropicAPI(ModelAPI):
|
|
598
674
|
else:
|
599
675
|
return None
|
600
676
|
|
677
|
+
def web_search_tool_param(
|
678
|
+
self, tool: ToolInfo
|
679
|
+
) -> WebSearchTool20250305Param | None:
|
680
|
+
if (
|
681
|
+
tool.name == "web_search"
|
682
|
+
and tool.options
|
683
|
+
and "anthropic" in tool.options
|
684
|
+
and self.model_name in WEB_SEARCH_COMPATIBLE_MODELS
|
685
|
+
):
|
686
|
+
return _web_search_tool_param(tool.options["anthropic"])
|
687
|
+
else:
|
688
|
+
return None
|
689
|
+
|
690
|
+
|
691
|
+
def _web_search_tool_param(
|
692
|
+
maybe_anthropic_options: object,
|
693
|
+
) -> WebSearchTool20250305Param:
|
694
|
+
if maybe_anthropic_options is not None and not isinstance(
|
695
|
+
maybe_anthropic_options, dict
|
696
|
+
):
|
697
|
+
raise TypeError(
|
698
|
+
f"Expected a dictionary for anthropic_options, got {type(maybe_anthropic_options)}"
|
699
|
+
)
|
700
|
+
|
701
|
+
result = WebSearchTool20250305Param(
|
702
|
+
name="web_search",
|
703
|
+
type="web_search_20250305",
|
704
|
+
)
|
705
|
+
|
706
|
+
if maybe_anthropic_options:
|
707
|
+
if "allowed_domains" in maybe_anthropic_options:
|
708
|
+
result["allowed_domains"] = maybe_anthropic_options["allowed_domains"]
|
709
|
+
if "blocked_domains" in maybe_anthropic_options:
|
710
|
+
result["blocked_domains"] = maybe_anthropic_options["blocked_domains"]
|
711
|
+
if "cache_control" in maybe_anthropic_options:
|
712
|
+
result["cache_control"] = maybe_anthropic_options["cache_control"]
|
713
|
+
if "max_uses" in maybe_anthropic_options:
|
714
|
+
result["max_uses"] = maybe_anthropic_options["max_uses"]
|
715
|
+
if "user_location" in maybe_anthropic_options:
|
716
|
+
result["user_location"] = maybe_anthropic_options["user_location"]
|
717
|
+
|
718
|
+
return result
|
719
|
+
|
601
720
|
|
602
721
|
# tools can be either a stock tool param or a special Anthropic native use tool param
|
603
722
|
ToolParamDef = (
|
@@ -605,6 +724,8 @@ ToolParamDef = (
|
|
605
724
|
| BetaToolComputerUse20250124Param
|
606
725
|
| ToolTextEditor20250124Param
|
607
726
|
| BetaToolTextEditor20241022Param
|
727
|
+
| BetaToolTextEditor20250429Param
|
728
|
+
| WebSearchTool20250305Param
|
608
729
|
)
|
609
730
|
|
610
731
|
|
@@ -614,6 +735,8 @@ def add_cache_control(
|
|
614
735
|
| BetaToolComputerUse20250124Param
|
615
736
|
| ToolTextEditor20250124Param
|
616
737
|
| BetaToolTextEditor20241022Param
|
738
|
+
| BetaToolTextEditor20250429Param
|
739
|
+
| WebSearchTool20250305Param
|
617
740
|
| dict[str, Any],
|
618
741
|
) -> None:
|
619
742
|
cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
|
@@ -698,6 +821,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
698
821
|
| ImageBlockParam
|
699
822
|
| ThinkingBlockParam
|
700
823
|
| RedactedThinkingBlockParam
|
824
|
+
| ServerToolUseBlockParam
|
825
|
+
| WebSearchToolResultBlockParam
|
701
826
|
]
|
702
827
|
) = message.error.message
|
703
828
|
# anthropic requires that content be populated when
|
@@ -735,6 +860,8 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
735
860
|
| RedactedThinkingBlockParam
|
736
861
|
| ImageBlockParam
|
737
862
|
| ToolUseBlockParam
|
863
|
+
| ServerToolUseBlockParam
|
864
|
+
| WebSearchToolResultBlockParam
|
738
865
|
] = (
|
739
866
|
[TextBlockParam(type="text", text=message.content or NO_CONTENT)]
|
740
867
|
if isinstance(message.content, str)
|
@@ -785,7 +912,7 @@ async def model_output_from_message(
|
|
785
912
|
model: str,
|
786
913
|
message: Message,
|
787
914
|
tools: list[ToolInfo],
|
788
|
-
) -> ModelOutput:
|
915
|
+
) -> tuple[ModelOutput, bool]:
|
789
916
|
# extract content and tool calls
|
790
917
|
content: list[Content] = []
|
791
918
|
reasoning_tokens = 0
|
@@ -800,7 +927,20 @@ async def model_output_from_message(
|
|
800
927
|
content_text = content_text.replace("<result>", "").replace(
|
801
928
|
"</result>", ""
|
802
929
|
)
|
803
|
-
content.append(
|
930
|
+
content.append(
|
931
|
+
ContentText(
|
932
|
+
type="text",
|
933
|
+
text=content_text,
|
934
|
+
citations=(
|
935
|
+
[
|
936
|
+
to_inspect_citation(citation)
|
937
|
+
for citation in content_block.citations
|
938
|
+
]
|
939
|
+
if content_block.citations
|
940
|
+
else None
|
941
|
+
),
|
942
|
+
)
|
943
|
+
)
|
804
944
|
elif isinstance(content_block, ToolUseBlock):
|
805
945
|
tool_calls = tool_calls or []
|
806
946
|
(tool_name, internal_name) = _names_for_tool_call(content_block.name, tools)
|
@@ -812,6 +952,10 @@ async def model_output_from_message(
|
|
812
952
|
internal=internal_name,
|
813
953
|
)
|
814
954
|
)
|
955
|
+
elif isinstance(content_block, ServerToolUseBlock):
|
956
|
+
content.append(ContentData(data=content_block.model_dump()))
|
957
|
+
elif isinstance(content_block, WebSearchToolResultBlock):
|
958
|
+
content.append(ContentData(data=content_block.model_dump()))
|
815
959
|
elif isinstance(content_block, RedactedThinkingBlock):
|
816
960
|
content.append(
|
817
961
|
ContentReasoning(reasoning=content_block.data, redacted=True)
|
@@ -827,11 +971,12 @@ async def model_output_from_message(
|
|
827
971
|
)
|
828
972
|
|
829
973
|
# resolve choice
|
974
|
+
stop_reason, pause_turn = message_stop_reason(message)
|
830
975
|
choice = ChatCompletionChoice(
|
831
976
|
message=ChatMessageAssistant(
|
832
977
|
content=content, tool_calls=tool_calls, model=model, source="generate"
|
833
978
|
),
|
834
|
-
stop_reason=
|
979
|
+
stop_reason=stop_reason,
|
835
980
|
)
|
836
981
|
|
837
982
|
# return ModelOutput
|
@@ -844,17 +989,20 @@ async def model_output_from_message(
|
|
844
989
|
+ (input_tokens_cache_read or 0)
|
845
990
|
+ message.usage.output_tokens # includes reasoning tokens
|
846
991
|
)
|
847
|
-
return
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
992
|
+
return (
|
993
|
+
ModelOutput(
|
994
|
+
model=message.model,
|
995
|
+
choices=[choice],
|
996
|
+
usage=ModelUsage(
|
997
|
+
input_tokens=message.usage.input_tokens,
|
998
|
+
output_tokens=message.usage.output_tokens,
|
999
|
+
total_tokens=total_tokens,
|
1000
|
+
input_tokens_cache_write=input_tokens_cache_write,
|
1001
|
+
input_tokens_cache_read=input_tokens_cache_read,
|
1002
|
+
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
|
1003
|
+
),
|
857
1004
|
),
|
1005
|
+
pause_turn,
|
858
1006
|
)
|
859
1007
|
|
860
1008
|
|
@@ -880,6 +1028,7 @@ def _names_for_tool_call(
|
|
880
1028
|
(INTERNAL_COMPUTER_TOOL_NAME, "computer_20250124", "computer"),
|
881
1029
|
("str_replace_editor", "text_editor_20241022", "text_editor"),
|
882
1030
|
("str_replace_editor", "text_editor_20250124", "text_editor"),
|
1031
|
+
("str_replace_based_edit_tool", "text_editor_20250429", "text_editor"),
|
883
1032
|
("bash", "bash_20250124", "bash_session"),
|
884
1033
|
)
|
885
1034
|
|
@@ -893,16 +1042,18 @@ def _names_for_tool_call(
|
|
893
1042
|
)
|
894
1043
|
|
895
1044
|
|
896
|
-
def message_stop_reason(message: Message) -> StopReason:
|
1045
|
+
def message_stop_reason(message: Message) -> tuple[StopReason, bool]:
|
897
1046
|
match message.stop_reason:
|
898
1047
|
case "end_turn" | "stop_sequence":
|
899
|
-
return "stop"
|
1048
|
+
return "stop", False
|
900
1049
|
case "tool_use":
|
901
|
-
return "tool_calls"
|
1050
|
+
return "tool_calls", False
|
902
1051
|
case "max_tokens":
|
903
|
-
return message.stop_reason
|
1052
|
+
return message.stop_reason, False
|
1053
|
+
case "refusal":
|
1054
|
+
return "content_filter", False
|
904
1055
|
case _:
|
905
|
-
return "unknown"
|
1056
|
+
return "unknown", message.stop_reason == "pause_turn"
|
906
1057
|
|
907
1058
|
|
908
1059
|
def split_system_messages(
|
@@ -918,9 +1069,24 @@ def split_system_messages(
|
|
918
1069
|
|
919
1070
|
async def message_param_content(
|
920
1071
|
content: Content,
|
921
|
-
) ->
|
1072
|
+
) -> (
|
1073
|
+
TextBlockParam
|
1074
|
+
| ImageBlockParam
|
1075
|
+
| ThinkingBlockParam
|
1076
|
+
| RedactedThinkingBlockParam
|
1077
|
+
| ServerToolUseBlockParam
|
1078
|
+
| WebSearchToolResultBlockParam
|
1079
|
+
):
|
922
1080
|
if isinstance(content, ContentText):
|
923
|
-
|
1081
|
+
citations = (
|
1082
|
+
[to_anthropic_citation(citation) for citation in content.citations]
|
1083
|
+
if content.citations
|
1084
|
+
else None
|
1085
|
+
)
|
1086
|
+
|
1087
|
+
return TextBlockParam(
|
1088
|
+
type="text", text=content.text or NO_CONTENT, citations=citations
|
1089
|
+
)
|
924
1090
|
elif isinstance(content, ContentImage):
|
925
1091
|
# resolve to url
|
926
1092
|
image = await file_as_data_uri(content.image)
|
@@ -948,6 +1114,19 @@ async def message_param_content(
|
|
948
1114
|
return ThinkingBlockParam(
|
949
1115
|
type="thinking", thinking=content.reasoning, signature=content.signature
|
950
1116
|
)
|
1117
|
+
elif isinstance(content, ContentData):
|
1118
|
+
match content.data.get("type", None):
|
1119
|
+
case "server_tool_use":
|
1120
|
+
return cast(
|
1121
|
+
ServerToolUseBlockParam,
|
1122
|
+
ServerToolUseBlock.model_validate(content.data).model_dump(),
|
1123
|
+
)
|
1124
|
+
case "web_search_tool_result":
|
1125
|
+
return cast(
|
1126
|
+
WebSearchToolResultBlockParam,
|
1127
|
+
WebSearchToolResultBlock.model_validate(content.data).model_dump(),
|
1128
|
+
)
|
1129
|
+
raise NotImplementedError()
|
951
1130
|
else:
|
952
1131
|
raise RuntimeError(
|
953
1132
|
"Anthropic models do not currently support audio or video inputs."
|
@@ -990,3 +1169,7 @@ def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
|
|
990
1169
|
value = copy(value)
|
991
1170
|
value.update(data=BASE_64_DATA_REMOVED)
|
992
1171
|
return value
|
1172
|
+
|
1173
|
+
|
1174
|
+
def _content_list(input: str | list[Content]) -> list[Content]:
|
1175
|
+
return [ContentText(text=input)] if isinstance(input, str) else input
|
@@ -26,6 +26,7 @@ from google.genai.types import (
|
|
26
26
|
GenerateContentResponse,
|
27
27
|
GenerateContentResponsePromptFeedback,
|
28
28
|
GenerateContentResponseUsageMetadata,
|
29
|
+
GoogleSearch,
|
29
30
|
HarmBlockThreshold,
|
30
31
|
HarmCategory,
|
31
32
|
HttpOptions,
|
@@ -48,6 +49,7 @@ from inspect_ai._util.content import (
|
|
48
49
|
)
|
49
50
|
from inspect_ai._util.content import (
|
50
51
|
ContentAudio,
|
52
|
+
ContentData,
|
51
53
|
ContentImage,
|
52
54
|
ContentReasoning,
|
53
55
|
ContentText,
|
@@ -74,6 +76,7 @@ from inspect_ai.model import (
|
|
74
76
|
TopLogprob,
|
75
77
|
)
|
76
78
|
from inspect_ai.model._model_call import ModelCall
|
79
|
+
from inspect_ai.model._providers._google_citations import get_candidate_citations
|
77
80
|
from inspect_ai.tool import (
|
78
81
|
ToolCall,
|
79
82
|
ToolChoice,
|
@@ -247,7 +250,7 @@ class GoogleGenAIAPI(ModelAPI):
|
|
247
250
|
|
248
251
|
# Create google-genai types.
|
249
252
|
gemini_contents = await as_chat_messages(client, input)
|
250
|
-
gemini_tools = chat_tools(tools) if len(tools) > 0 else None
|
253
|
+
gemini_tools = self.chat_tools(tools) if len(tools) > 0 else None
|
251
254
|
gemini_tool_config = chat_tool_config(tool_choice) if len(tools) > 0 else None
|
252
255
|
parameters = GenerateContentConfig(
|
253
256
|
http_options=HttpOptions(headers={HttpHooks.REQUEST_ID_HEADER: request_id}),
|
@@ -362,6 +365,61 @@ class GoogleGenAIAPI(ModelAPI):
|
|
362
365
|
else:
|
363
366
|
return None
|
364
367
|
|
368
|
+
def _use_native_search(self, tool: ToolInfo) -> bool:
|
369
|
+
return (
|
370
|
+
tool.name == "web_search"
|
371
|
+
and tool.options is not None
|
372
|
+
and "gemini" in tool.options
|
373
|
+
# Support "starts with" Gemini 2.0
|
374
|
+
and (self.is_gemini() and not self.is_gemini_1_5())
|
375
|
+
)
|
376
|
+
|
377
|
+
def _categorize_tool(
|
378
|
+
self, acc: tuple[bool, list[FunctionDeclaration]], tool: ToolInfo
|
379
|
+
) -> tuple[bool, list[FunctionDeclaration]]:
|
380
|
+
"""Reducer function that categorizes tools into native search vs function declarations.
|
381
|
+
|
382
|
+
Returns:
|
383
|
+
Tuple of (has_native_search, function_declarations) where has_native_search
|
384
|
+
is True if any tool uses native search, and function_declarations contains
|
385
|
+
all non-native-search tools converted to FunctionDeclaration objects.
|
386
|
+
"""
|
387
|
+
return (
|
388
|
+
(True, acc[1])
|
389
|
+
if self._use_native_search(tool)
|
390
|
+
else (
|
391
|
+
acc[0],
|
392
|
+
acc[1]
|
393
|
+
+ [
|
394
|
+
FunctionDeclaration(
|
395
|
+
name=tool.name,
|
396
|
+
description=tool.description,
|
397
|
+
parameters=schema_from_param(tool.parameters)
|
398
|
+
if len(tool.parameters.properties) > 0
|
399
|
+
else None,
|
400
|
+
)
|
401
|
+
],
|
402
|
+
)
|
403
|
+
)
|
404
|
+
|
405
|
+
def chat_tools(self, tools: list[ToolInfo]) -> ToolListUnion:
|
406
|
+
has_native_search, function_declarations = functools.reduce(
|
407
|
+
self._categorize_tool, tools, (False, list[FunctionDeclaration]())
|
408
|
+
)
|
409
|
+
|
410
|
+
# TODO: Google doesn't yet support native search concurrently with other tools.
|
411
|
+
# Revisit this from time to time to adapt when they fix it.
|
412
|
+
if has_native_search and function_declarations:
|
413
|
+
raise ValueError(
|
414
|
+
"Gemini does not yet support native search concurrently with other tools."
|
415
|
+
)
|
416
|
+
|
417
|
+
return (
|
418
|
+
[Tool(google_search=GoogleSearch())]
|
419
|
+
if has_native_search
|
420
|
+
else [Tool(function_declarations=function_declarations)]
|
421
|
+
)
|
422
|
+
|
365
423
|
|
366
424
|
def safety_settings_to_list(
|
367
425
|
safety_settings: list[SafetySettingDict],
|
@@ -500,6 +558,8 @@ async def content_part(client: Client, content: InspectContent | str) -> Part:
|
|
500
558
|
return Part.from_text(text=content.text or NO_CONTENT)
|
501
559
|
elif isinstance(content, ContentReasoning):
|
502
560
|
return Part.from_text(text=content.reasoning or NO_CONTENT)
|
561
|
+
elif isinstance(content, ContentData):
|
562
|
+
assert False, "Google provider should never encounter ContentData"
|
503
563
|
else:
|
504
564
|
return await chat_content_to_part(client, content)
|
505
565
|
|
@@ -538,20 +598,6 @@ async def extract_system_message_as_parts(
|
|
538
598
|
return system_parts or None
|
539
599
|
|
540
600
|
|
541
|
-
def chat_tools(tools: list[ToolInfo]) -> ToolListUnion:
|
542
|
-
declarations = [
|
543
|
-
FunctionDeclaration(
|
544
|
-
name=tool.name,
|
545
|
-
description=tool.description,
|
546
|
-
parameters=schema_from_param(tool.parameters)
|
547
|
-
if len(tool.parameters.properties) > 0
|
548
|
-
else None,
|
549
|
-
)
|
550
|
-
for tool in tools
|
551
|
-
]
|
552
|
-
return [Tool(function_declarations=declarations)]
|
553
|
-
|
554
|
-
|
555
601
|
# https://ai.google.dev/gemini-api/tutorials/extract_structured_data#define_the_schema
|
556
602
|
def schema_from_param(
|
557
603
|
param: ToolParam | ToolParams, nullable: bool | None = False
|
@@ -656,19 +702,36 @@ def completion_choice_from_candidate(
|
|
656
702
|
| ContentImage
|
657
703
|
| ContentAudio
|
658
704
|
| ContentVideo
|
705
|
+
| ContentData
|
659
706
|
]
|
660
707
|
) = ""
|
661
708
|
# content.parts can be None when the finish_reason is MALFORMED_FUNCTION_CALL
|
662
709
|
elif candidate.content.parts is None:
|
663
710
|
content = ""
|
664
711
|
else:
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
712
|
+
# Google's grounded search metadata provides start/end indices for cited
|
713
|
+
# text based on the joining of all separate text parts (despite the doc
|
714
|
+
# suggesting that they provide part_index). Thankfully, the doc also says:
|
715
|
+
#
|
716
|
+
# Exactly one field within a Part should be set, representing the specific type
|
717
|
+
# of content being conveyed. Using multiple fields within the same `Part`
|
718
|
+
# instance is considered invalid.
|
719
|
+
#
|
720
|
+
# That means that we can safely collapse adjacent parts with a `text` field
|
721
|
+
# and not fear that we're breaking other types of content parts
|
722
|
+
parts = functools.reduce(
|
723
|
+
_combine_text_parts, candidate.content.parts, list[Part]()
|
724
|
+
)
|
725
|
+
|
726
|
+
content = [
|
727
|
+
ContentReasoning(reasoning=part.text)
|
728
|
+
if part.thought is True
|
729
|
+
else ContentText(
|
730
|
+
text=part.text, citations=get_candidate_citations(candidate)
|
731
|
+
)
|
732
|
+
for part in parts
|
733
|
+
if part.text is not None
|
734
|
+
]
|
672
735
|
|
673
736
|
# now tool calls
|
674
737
|
tool_calls: list[ToolCall] = []
|
@@ -922,3 +985,16 @@ async def file_for_content(
|
|
922
985
|
files_db.put(content_sha256, str(upload.name))
|
923
986
|
# return the file
|
924
987
|
return upload
|
988
|
+
|
989
|
+
|
990
|
+
def _combine_text_parts(acc: list[Part], part: Part) -> list[Part]:
|
991
|
+
"""Combine adjacent text parts into a single part."""
|
992
|
+
return (
|
993
|
+
acc + [part]
|
994
|
+
if part.text is None
|
995
|
+
or part.thought is True
|
996
|
+
or len(acc) == 0
|
997
|
+
or acc[-1].text is None
|
998
|
+
or acc[-1].thought is True
|
999
|
+
else acc[:-1] + [Part(text=acc[-1].text + part.text)]
|
1000
|
+
)
|
@@ -44,9 +44,15 @@ from typing_extensions import override
|
|
44
44
|
# TODO: Migration guide:
|
45
45
|
# https://github.com/mistralai/client-python/blob/main/MIGRATION.md
|
46
46
|
from inspect_ai._util.constants import NO_CONTENT
|
47
|
-
from inspect_ai._util.content import
|
47
|
+
from inspect_ai._util.content import (
|
48
|
+
Content,
|
49
|
+
ContentImage,
|
50
|
+
ContentReasoning,
|
51
|
+
ContentText,
|
52
|
+
)
|
48
53
|
from inspect_ai._util.http import is_retryable_http_status
|
49
54
|
from inspect_ai._util.images import file_as_data_uri
|
55
|
+
from inspect_ai.model._reasoning import parse_content_with_reasoning
|
50
56
|
from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
|
51
57
|
|
52
58
|
from ..._util.httpx import httpx_should_retry
|
@@ -481,26 +487,33 @@ def completion_content(content: str | list[ContentChunk]) -> str | list[Content]
|
|
481
487
|
if isinstance(content, str):
|
482
488
|
return content
|
483
489
|
else:
|
484
|
-
return [
|
490
|
+
return [item for c in content for item in completion_content_chunks(c)]
|
485
491
|
|
486
492
|
|
487
|
-
def
|
493
|
+
def completion_content_chunks(content: ContentChunk) -> list[Content]:
|
488
494
|
if isinstance(content, ReferenceChunk):
|
489
495
|
raise TypeError("ReferenceChunk content is not supported by Inspect.")
|
490
496
|
elif isinstance(content, TextChunk):
|
491
|
-
|
497
|
+
parsed = parse_content_with_reasoning(content.text)
|
498
|
+
if parsed:
|
499
|
+
return [
|
500
|
+
ContentReasoning(reasoning=parsed.reasoning),
|
501
|
+
ContentText(text=parsed.content),
|
502
|
+
]
|
503
|
+
else:
|
504
|
+
return [ContentText(text=content.text)]
|
492
505
|
elif isinstance(content, DocumentURLChunk):
|
493
|
-
return ContentText(text=content.document_url)
|
506
|
+
return [ContentText(text=content.document_url)]
|
494
507
|
else:
|
495
508
|
if isinstance(content.image_url, str):
|
496
|
-
return ContentImage(image=content.image_url)
|
509
|
+
return [ContentImage(image=content.image_url)]
|
497
510
|
else:
|
498
511
|
match content.image_url.detail:
|
499
512
|
case "low" | "high":
|
500
513
|
detail: Literal["auto", "low", "high"] = content.image_url.detail
|
501
514
|
case _:
|
502
515
|
detail = "auto"
|
503
|
-
return ContentImage(image=content.image_url.url, detail=detail)
|
516
|
+
return [ContentImage(image=content.image_url.url, detail=detail)]
|
504
517
|
|
505
518
|
|
506
519
|
def completion_choices_from_response(
|