inspect-ai 0.3.102__py3-none-any.whl → 0.3.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -1
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +14 -2
- inspect_ai/_eval/evalset.py +3 -2
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +7 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +5 -1
- inspect_ai/_eval/task/run.py +1 -1
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/www/dist/assets/index.css +82 -24
- inspect_ai/_view/www/dist/assets/index.js +10124 -9808
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +3 -3
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/eval.py +4 -3
- inspect_ai/log/_recorders/json.py +5 -2
- inspect_ai/log/_recorders/recorder.py +1 -0
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +196 -34
- inspect_ai/model/_providers/google.py +94 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_span.py +12 -1
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +112 -88
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
inspect_ai/model/__init__.py
CHANGED
@@ -1,8 +1,16 @@
|
|
1
1
|
# ruff: noqa: F401 F403 F405
|
2
2
|
|
3
|
+
from inspect_ai._util.citation import (
|
4
|
+
Citation,
|
5
|
+
CitationBase,
|
6
|
+
ContentCitation,
|
7
|
+
DocumentCitation,
|
8
|
+
UrlCitation,
|
9
|
+
)
|
3
10
|
from inspect_ai._util.content import (
|
4
11
|
Content,
|
5
12
|
ContentAudio,
|
13
|
+
ContentData,
|
6
14
|
ContentImage,
|
7
15
|
ContentReasoning,
|
8
16
|
ContentText,
|
@@ -59,6 +67,7 @@ __all__ = [
|
|
59
67
|
"ResponseSchema",
|
60
68
|
"CachePolicy",
|
61
69
|
"ContentAudio",
|
70
|
+
"ContentData",
|
62
71
|
"ContentImage",
|
63
72
|
"ContentReasoning",
|
64
73
|
"ContentText",
|
@@ -93,6 +102,11 @@ __all__ = [
|
|
93
102
|
"cache_size",
|
94
103
|
"get_model",
|
95
104
|
"modelapi",
|
105
|
+
"Citation",
|
106
|
+
"CitationBase",
|
107
|
+
"DocumentCitation",
|
108
|
+
"ContentCitation",
|
109
|
+
"UrlCitation",
|
96
110
|
]
|
97
111
|
|
98
112
|
_TOOL_MODULE_VERSION = "0.3.18"
|
inspect_ai/model/_call_tools.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import inspect
|
2
2
|
import json
|
3
3
|
import types
|
4
|
-
from copy import copy
|
4
|
+
from copy import copy, deepcopy
|
5
5
|
from dataclasses import is_dataclass
|
6
6
|
from datetime import date, datetime, time
|
7
7
|
from enum import EnumMeta
|
@@ -36,6 +36,7 @@ from pydantic import BaseModel
|
|
36
36
|
from inspect_ai._util.content import (
|
37
37
|
Content,
|
38
38
|
ContentAudio,
|
39
|
+
ContentData,
|
39
40
|
ContentImage,
|
40
41
|
ContentText,
|
41
42
|
ContentVideo,
|
@@ -188,13 +189,19 @@ async def execute_tools(
|
|
188
189
|
# types to string as that is what the model APIs accept
|
189
190
|
truncated: tuple[int, int] | None = None
|
190
191
|
if isinstance(
|
191
|
-
result,
|
192
|
+
result,
|
193
|
+
ContentText | ContentImage | ContentAudio | ContentVideo | ContentData,
|
192
194
|
):
|
193
195
|
content: str | list[Content] = [result]
|
194
196
|
elif isinstance(result, list) and (
|
195
197
|
len(result) == 0
|
196
198
|
or isinstance(
|
197
|
-
result[0],
|
199
|
+
result[0],
|
200
|
+
ContentText
|
201
|
+
| ContentImage
|
202
|
+
| ContentAudio
|
203
|
+
| ContentVideo
|
204
|
+
| ContentData,
|
198
205
|
)
|
199
206
|
):
|
200
207
|
content = result
|
@@ -471,7 +478,9 @@ async def agent_handoff(
|
|
471
478
|
limit_error: LimitExceededError | None = None
|
472
479
|
agent_state = AgentState(messages=copy(agent_conversation))
|
473
480
|
try:
|
474
|
-
|
481
|
+
# The agent_tool's limits will be applied multiple times if the agent is handed
|
482
|
+
# off to multiple times which is not supported, so create a copy of each limit.
|
483
|
+
with apply_limits(deepcopy(agent_tool.limits)):
|
475
484
|
async with span(name=agent_name, type="agent"):
|
476
485
|
agent_state = await agent_tool.agent(agent_state, **arguments)
|
477
486
|
except LimitExceededError as ex:
|
@@ -26,6 +26,9 @@ class ChatMessageBase(BaseModel):
|
|
26
26
|
source: Literal["input", "generate"] | None = Field(default=None)
|
27
27
|
"""Source of message."""
|
28
28
|
|
29
|
+
metadata: dict[str, Any] | None = Field(default=None)
|
30
|
+
"""Additional message metadata."""
|
31
|
+
|
29
32
|
internal: JsonValue | None = Field(default=None)
|
30
33
|
"""Model provider specific payload - typically used to aid transformation back to model types."""
|
31
34
|
|
@@ -31,9 +31,16 @@ from openai.types.responses.response_create_params import (
|
|
31
31
|
ToolChoice as ResponsesToolChoice,
|
32
32
|
)
|
33
33
|
from openai.types.responses.response_input_item_param import FunctionCallOutput, Message
|
34
|
+
from openai.types.responses.response_output_text import (
|
35
|
+
Annotation,
|
36
|
+
AnnotationFileCitation,
|
37
|
+
AnnotationFilePath,
|
38
|
+
AnnotationURLCitation,
|
39
|
+
)
|
34
40
|
from openai.types.responses.response_reasoning_item_param import Summary
|
35
41
|
from pydantic import JsonValue
|
36
42
|
|
43
|
+
from inspect_ai._util.citation import Citation, DocumentCitation, UrlCitation
|
37
44
|
from inspect_ai._util.content import (
|
38
45
|
Content,
|
39
46
|
ContentImage,
|
@@ -47,29 +54,30 @@ from inspect_ai.model._chat_message import ChatMessage, ChatMessageAssistant
|
|
47
54
|
from inspect_ai.model._generate_config import GenerateConfig
|
48
55
|
from inspect_ai.model._model_output import ChatCompletionChoice, StopReason
|
49
56
|
from inspect_ai.model._openai import is_o_series
|
50
|
-
from inspect_ai.
|
57
|
+
from inspect_ai.tool._tool_call import ToolCall
|
58
|
+
from inspect_ai.tool._tool_choice import ToolChoice
|
59
|
+
from inspect_ai.tool._tool_info import ToolInfo
|
60
|
+
|
61
|
+
from ._providers._openai_computer_use import (
|
51
62
|
computer_call_output,
|
52
63
|
maybe_computer_use_preview_tool,
|
53
64
|
tool_call_from_openai_computer_tool_call,
|
54
65
|
)
|
55
|
-
from
|
56
|
-
from inspect_ai.tool._tool_call import ToolCall
|
57
|
-
from inspect_ai.tool._tool_choice import ToolChoice
|
58
|
-
from inspect_ai.tool._tool_info import ToolInfo
|
66
|
+
from ._providers._openai_web_search import maybe_web_search_tool
|
59
67
|
|
60
68
|
|
61
69
|
async def openai_responses_inputs(
|
62
|
-
messages: list[ChatMessage], model: str
|
70
|
+
messages: list[ChatMessage], model: str
|
63
71
|
) -> list[ResponseInputItemParam]:
|
64
72
|
return [
|
65
73
|
item
|
66
74
|
for message in messages
|
67
|
-
for item in await _openai_input_item_from_chat_message(message, model
|
75
|
+
for item in await _openai_input_item_from_chat_message(message, model)
|
68
76
|
]
|
69
77
|
|
70
78
|
|
71
79
|
async def _openai_input_item_from_chat_message(
|
72
|
-
message: ChatMessage, model: str
|
80
|
+
message: ChatMessage, model: str
|
73
81
|
) -> list[ResponseInputItemParam]:
|
74
82
|
if message.role == "system":
|
75
83
|
content = await _openai_responses_content_list_param(message.content)
|
@@ -87,7 +95,7 @@ async def _openai_input_item_from_chat_message(
|
|
87
95
|
)
|
88
96
|
]
|
89
97
|
elif message.role == "assistant":
|
90
|
-
return _openai_input_items_from_chat_message_assistant(message
|
98
|
+
return _openai_input_items_from_chat_message_assistant(message)
|
91
99
|
elif message.role == "tool":
|
92
100
|
if message.internal:
|
93
101
|
internal = _model_tool_call_for_internal(message.internal)
|
@@ -252,7 +260,18 @@ def _chat_message_assistant_from_openai_response(
|
|
252
260
|
case ResponseOutputMessage(content=content, id=id):
|
253
261
|
message_content.extend(
|
254
262
|
[
|
255
|
-
ContentText(
|
263
|
+
ContentText(
|
264
|
+
text=c.text,
|
265
|
+
internal={"id": id},
|
266
|
+
citations=(
|
267
|
+
[
|
268
|
+
_to_inspect_citation(annotation)
|
269
|
+
for annotation in c.annotations
|
270
|
+
]
|
271
|
+
if c.annotations
|
272
|
+
else None
|
273
|
+
),
|
274
|
+
)
|
256
275
|
if isinstance(c, ResponseOutputText)
|
257
276
|
else ContentText(
|
258
277
|
text=c.refusal, refusal=True, internal={"id": id}
|
@@ -310,7 +329,7 @@ def _chat_message_assistant_from_openai_response(
|
|
310
329
|
|
311
330
|
|
312
331
|
def _openai_input_items_from_chat_message_assistant(
|
313
|
-
message: ChatMessageAssistant,
|
332
|
+
message: ChatMessageAssistant,
|
314
333
|
) -> list[ResponseInputItemParam]:
|
315
334
|
"""
|
316
335
|
Transform a `ChatMessageAssistant` into OpenAI `ResponseInputItem`'s for playback to the model.
|
@@ -343,10 +362,6 @@ def _openai_input_items_from_chat_message_assistant(
|
|
343
362
|
)
|
344
363
|
suppress_output_message = message.internal is not None and not has_content_with_ids
|
345
364
|
|
346
|
-
# if we are not storing messages on the server then blank these out
|
347
|
-
if not store:
|
348
|
-
tool_message_ids = {}
|
349
|
-
|
350
365
|
# items to return
|
351
366
|
items: list[ResponseInputItemParam] = []
|
352
367
|
# group content by message ID
|
@@ -354,30 +369,21 @@ def _openai_input_items_from_chat_message_assistant(
|
|
354
369
|
str | None, list[ResponseOutputTextParam | ResponseOutputRefusalParam]
|
355
370
|
] = {}
|
356
371
|
|
357
|
-
for content in (
|
358
|
-
list[ContentText | ContentReasoning]([ContentText(text=message.content)])
|
359
|
-
if isinstance(message.content, str)
|
360
|
-
else [
|
361
|
-
c for c in message.content if isinstance(c, ContentText | ContentReasoning)
|
362
|
-
]
|
363
|
-
):
|
372
|
+
for content in _filter_consecutive_reasoning_blocks(content_items):
|
364
373
|
match content:
|
365
374
|
case ContentReasoning(reasoning=reasoning):
|
366
375
|
assert content.signature is not None, (
|
367
376
|
"reasoning_id must be saved in signature"
|
368
377
|
)
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
summary=[Summary(type="summary_text", text=reasoning)]
|
377
|
-
if reasoning
|
378
|
-
else [],
|
379
|
-
)
|
378
|
+
items.append(
|
379
|
+
ResponseReasoningItemParam(
|
380
|
+
type="reasoning",
|
381
|
+
id=content.signature,
|
382
|
+
summary=[Summary(type="summary_text", text=reasoning)]
|
383
|
+
if reasoning
|
384
|
+
else [],
|
380
385
|
)
|
386
|
+
)
|
381
387
|
case ContentText(text=text, refusal=refusal):
|
382
388
|
if suppress_output_message:
|
383
389
|
continue
|
@@ -409,7 +415,7 @@ def _openai_input_items_from_chat_message_assistant(
|
|
409
415
|
role="assistant",
|
410
416
|
# this actually can be `None`, and it will in fact be `None` when the
|
411
417
|
# assistant message is synthesized by the scaffold as opposed to being
|
412
|
-
# replayed from the model
|
418
|
+
# replayed from the model
|
413
419
|
id=msg_id, # type: ignore[typeddict-item]
|
414
420
|
content=content_list,
|
415
421
|
status="completed",
|
@@ -531,3 +537,43 @@ def _responses_tool_alias(name: str) -> str:
|
|
531
537
|
|
532
538
|
def _from_responses_tool_alias(name: str) -> str:
|
533
539
|
return next((k for k, v in _responses_tool_aliases.items() if v == name), name)
|
540
|
+
|
541
|
+
|
542
|
+
def _to_inspect_citation(input: Annotation) -> Citation:
|
543
|
+
match input:
|
544
|
+
case AnnotationURLCitation(
|
545
|
+
end_index=end_index, start_index=start_index, title=title, url=url
|
546
|
+
):
|
547
|
+
return UrlCitation(
|
548
|
+
cited_text=(start_index, end_index), title=title, url=url
|
549
|
+
)
|
550
|
+
|
551
|
+
case (
|
552
|
+
AnnotationFileCitation(file_id=file_id, index=index)
|
553
|
+
| AnnotationFilePath(file_id=file_id, index=index)
|
554
|
+
):
|
555
|
+
return DocumentCitation(internal={"file_id": file_id, "index": index})
|
556
|
+
assert False, f"Unexpected citation type: {input.type}"
|
557
|
+
|
558
|
+
|
559
|
+
def _filter_consecutive_reasoning_blocks(
|
560
|
+
content_list: list[ContentText | ContentReasoning],
|
561
|
+
) -> list[ContentText | ContentReasoning]:
|
562
|
+
return [
|
563
|
+
content
|
564
|
+
for i, content in enumerate(content_list)
|
565
|
+
if _should_keep_content(i, content, content_list)
|
566
|
+
]
|
567
|
+
|
568
|
+
|
569
|
+
def _should_keep_content(
|
570
|
+
i: int,
|
571
|
+
content: ContentText | ContentReasoning,
|
572
|
+
content_list: list[ContentText | ContentReasoning],
|
573
|
+
) -> bool:
|
574
|
+
return (
|
575
|
+
True
|
576
|
+
if not isinstance(content, ContentReasoning)
|
577
|
+
else i == len(content_list) - 1
|
578
|
+
or not isinstance(content_list[i + 1], ContentReasoning)
|
579
|
+
)
|
@@ -0,0 +1,158 @@
|
|
1
|
+
from anthropic.types import (
|
2
|
+
CitationCharLocation,
|
3
|
+
CitationCharLocationParam,
|
4
|
+
CitationContentBlockLocation,
|
5
|
+
CitationContentBlockLocationParam,
|
6
|
+
CitationPageLocation,
|
7
|
+
CitationPageLocationParam,
|
8
|
+
CitationsWebSearchResultLocation,
|
9
|
+
CitationWebSearchResultLocationParam,
|
10
|
+
TextCitation,
|
11
|
+
TextCitationParam,
|
12
|
+
)
|
13
|
+
|
14
|
+
from inspect_ai._util.citation import (
|
15
|
+
Citation,
|
16
|
+
DocumentCitation,
|
17
|
+
DocumentRange,
|
18
|
+
UrlCitation,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
def to_inspect_citation(input: TextCitation) -> Citation:
|
23
|
+
match input:
|
24
|
+
case CitationsWebSearchResultLocation(
|
25
|
+
cited_text=cited_text,
|
26
|
+
title=title,
|
27
|
+
url=url,
|
28
|
+
encrypted_index=encrypted_index,
|
29
|
+
):
|
30
|
+
# Sanitize a citation to work around https://github.com/anthropics/anthropic-sdk-python/issues/965.
|
31
|
+
return UrlCitation(
|
32
|
+
cited_text=cited_text,
|
33
|
+
title=title
|
34
|
+
if title is None or len(title) <= 255
|
35
|
+
else title[:254] + "…",
|
36
|
+
url=url,
|
37
|
+
internal={"encrypted_index": encrypted_index},
|
38
|
+
)
|
39
|
+
|
40
|
+
case CitationCharLocation(
|
41
|
+
cited_text=cited_text,
|
42
|
+
document_index=document_index,
|
43
|
+
document_title=title,
|
44
|
+
end_char_index=end_char_index,
|
45
|
+
start_char_index=start_char_index,
|
46
|
+
):
|
47
|
+
return DocumentCitation(
|
48
|
+
cited_text=cited_text,
|
49
|
+
title=title,
|
50
|
+
range=DocumentRange(
|
51
|
+
type="char", start_index=start_char_index, end_index=end_char_index
|
52
|
+
),
|
53
|
+
internal={"document_index": document_index},
|
54
|
+
)
|
55
|
+
|
56
|
+
case CitationContentBlockLocation(
|
57
|
+
cited_text=cited_text,
|
58
|
+
document_index=document_index,
|
59
|
+
document_title=title,
|
60
|
+
end_block_index=end_block_index,
|
61
|
+
start_block_index=start_block_index,
|
62
|
+
):
|
63
|
+
return DocumentCitation(
|
64
|
+
cited_text=cited_text,
|
65
|
+
title=title,
|
66
|
+
range=DocumentRange(
|
67
|
+
type="block",
|
68
|
+
start_index=start_block_index,
|
69
|
+
end_index=end_block_index,
|
70
|
+
),
|
71
|
+
internal={"document_index": document_index},
|
72
|
+
)
|
73
|
+
|
74
|
+
case CitationPageLocation(
|
75
|
+
cited_text=cited_text,
|
76
|
+
document_index=document_index,
|
77
|
+
document_title=title,
|
78
|
+
end_page_number=end_page_number,
|
79
|
+
start_page_number=start_page_number,
|
80
|
+
):
|
81
|
+
return DocumentCitation(
|
82
|
+
cited_text=cited_text,
|
83
|
+
title=title,
|
84
|
+
range=DocumentRange(
|
85
|
+
type="page",
|
86
|
+
start_index=start_page_number - 1,
|
87
|
+
end_index=end_page_number - 1,
|
88
|
+
),
|
89
|
+
internal={"document_index": document_index},
|
90
|
+
)
|
91
|
+
|
92
|
+
assert False, f"Unexpected citation type: {input.type}"
|
93
|
+
|
94
|
+
|
95
|
+
def to_anthropic_citation(input: Citation) -> TextCitationParam:
|
96
|
+
cited_text = input.cited_text
|
97
|
+
assert isinstance(cited_text, str), (
|
98
|
+
"anthropic citations must have a string cited_text"
|
99
|
+
)
|
100
|
+
|
101
|
+
match input:
|
102
|
+
case UrlCitation(title=title, url=url, internal=internal):
|
103
|
+
assert internal, "UrlCitation must have internal field"
|
104
|
+
encrypted_index = internal.get("encrypted_index", None)
|
105
|
+
assert isinstance(encrypted_index, str), (
|
106
|
+
"URL citations require encrypted_index in internal field"
|
107
|
+
)
|
108
|
+
|
109
|
+
return CitationWebSearchResultLocationParam(
|
110
|
+
type="web_search_result_location",
|
111
|
+
cited_text=cited_text,
|
112
|
+
title=title,
|
113
|
+
url=url,
|
114
|
+
encrypted_index=encrypted_index,
|
115
|
+
)
|
116
|
+
|
117
|
+
case DocumentCitation(title=title, range=range, internal=internal):
|
118
|
+
assert internal, "DocumentCharCitation must have internal field"
|
119
|
+
document_index = internal.get("document_index", None)
|
120
|
+
assert isinstance(document_index, int), (
|
121
|
+
"DocumentCharCitation require encrypted_index in internal field"
|
122
|
+
)
|
123
|
+
assert range, "DocumentCitation must have a range"
|
124
|
+
|
125
|
+
start_index = range.start_index
|
126
|
+
end_index = range.end_index
|
127
|
+
|
128
|
+
match range.type:
|
129
|
+
case "char":
|
130
|
+
return CitationCharLocationParam(
|
131
|
+
type="char_location",
|
132
|
+
cited_text=cited_text,
|
133
|
+
document_title=title,
|
134
|
+
document_index=document_index,
|
135
|
+
start_char_index=start_index,
|
136
|
+
end_char_index=end_index,
|
137
|
+
)
|
138
|
+
case "block":
|
139
|
+
return CitationContentBlockLocationParam(
|
140
|
+
type="content_block_location",
|
141
|
+
cited_text=cited_text,
|
142
|
+
document_title=title,
|
143
|
+
document_index=document_index,
|
144
|
+
start_block_index=start_index,
|
145
|
+
end_block_index=end_index,
|
146
|
+
)
|
147
|
+
case "page":
|
148
|
+
return CitationPageLocationParam(
|
149
|
+
type="page_location",
|
150
|
+
cited_text=cited_text,
|
151
|
+
document_title=title,
|
152
|
+
document_index=document_index,
|
153
|
+
start_page_number=start_index + 1,
|
154
|
+
end_page_number=end_index + 1,
|
155
|
+
)
|
156
|
+
|
157
|
+
# If we can't handle this citation type, raise an error
|
158
|
+
raise ValueError(f"Unsupported citation type: {input.type}")
|
@@ -0,0 +1,100 @@
|
|
1
|
+
from typing import Sequence
|
2
|
+
|
3
|
+
from google.genai.types import (
|
4
|
+
Candidate,
|
5
|
+
GroundingChunk,
|
6
|
+
GroundingSupport,
|
7
|
+
Segment,
|
8
|
+
)
|
9
|
+
|
10
|
+
from inspect_ai._util.citation import Citation, UrlCitation
|
11
|
+
|
12
|
+
|
13
|
+
def get_candidate_citations(candidate: Candidate) -> list[Citation]:
|
14
|
+
"""Extract citations from Google AI candidate grounding metadata.
|
15
|
+
|
16
|
+
Understanding Google API Grounding Citations: `GroundingChunk`'s, `GroundingSupport`, and `Segment`'s
|
17
|
+
|
18
|
+
1. Grounding Chunks (`GroundingChunk`)
|
19
|
+
What they are: The raw source material that the AI retrieved to support its response.
|
20
|
+
Structure:
|
21
|
+
- Web (`GroundingChunkWeb`): Content from web searches
|
22
|
+
- domain: The website domain
|
23
|
+
- title: Page title
|
24
|
+
- uri: Web page URL
|
25
|
+
Think of chunks as: The library books or web pages that contain the information.
|
26
|
+
|
27
|
+
2. Segments (`Segment`)
|
28
|
+
What they are: Specific portions of the AI's generated response text.
|
29
|
+
Structure:
|
30
|
+
- start_index & end_index: Byte positions in the response text
|
31
|
+
- text: The actual text from the response that this segment represents
|
32
|
+
Think of segments as: Specific sentences or paragraphs in the AI's response that need citations.
|
33
|
+
|
34
|
+
3. Grounding Support (`GroundingSupport`)
|
35
|
+
What they are: The bridge that connects segments of the AI's response to the chunks that support them.
|
36
|
+
Structure:
|
37
|
+
- grounding_chunk_indices: Array of integers pointing to specific chunks (e.g., [1,3,4] means chunks 1, 3, and 4 support this claim)
|
38
|
+
- segment: Which part of the response this support applies to
|
39
|
+
Think of support as: The footnotes that say "this claim in my response is backed up by these specific sources."
|
40
|
+
|
41
|
+
Args:
|
42
|
+
candidate: The Google AI candidate response containing grounding metadata
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
A list of `Citation` objects linking response segments to their web sources.
|
46
|
+
Currently only handles `GroundingChunkWeb` sources.
|
47
|
+
"""
|
48
|
+
return (
|
49
|
+
[]
|
50
|
+
if (
|
51
|
+
not candidate.content
|
52
|
+
or not candidate.content.parts
|
53
|
+
or not (metadata := candidate.grounding_metadata)
|
54
|
+
or not (chunks := metadata.grounding_chunks)
|
55
|
+
or not (supports := metadata.grounding_supports)
|
56
|
+
)
|
57
|
+
else [
|
58
|
+
citation
|
59
|
+
for support in supports
|
60
|
+
for citation in _citations_from_support(support, chunks)
|
61
|
+
]
|
62
|
+
)
|
63
|
+
|
64
|
+
|
65
|
+
def _create_citation_from_chunk_and_segment(
|
66
|
+
chunk: GroundingChunk, segment: Segment
|
67
|
+
) -> UrlCitation | None:
|
68
|
+
"""Create a citation from a chunk and segment, returning None if chunk is not web-based."""
|
69
|
+
return (
|
70
|
+
UrlCitation(
|
71
|
+
url=chunk.web.uri,
|
72
|
+
title=chunk.web.title,
|
73
|
+
cited_text=(
|
74
|
+
(segment.start_index or 0, segment.end_index)
|
75
|
+
if segment.end_index is not None
|
76
|
+
else None
|
77
|
+
),
|
78
|
+
)
|
79
|
+
if (chunk.web and chunk.web.uri)
|
80
|
+
else None
|
81
|
+
)
|
82
|
+
|
83
|
+
|
84
|
+
def _citations_from_support(
|
85
|
+
support: GroundingSupport, chunks: Sequence[GroundingChunk]
|
86
|
+
) -> list[Citation]:
|
87
|
+
return (
|
88
|
+
[]
|
89
|
+
if support.segment is None or support.grounding_chunk_indices is None
|
90
|
+
else [
|
91
|
+
citation
|
92
|
+
for chunk_index in support.grounding_chunk_indices
|
93
|
+
if chunk_index < len(chunks)
|
94
|
+
if (
|
95
|
+
citation := _create_citation_from_chunk_and_segment(
|
96
|
+
chunks[chunk_index], support.segment
|
97
|
+
)
|
98
|
+
)
|
99
|
+
]
|
100
|
+
)
|