inspect-ai 0.3.70__py3-none-any.whl → 0.3.72__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +14 -8
- inspect_ai/_display/core/display.py +2 -0
- inspect_ai/_display/core/footer.py +13 -3
- inspect_ai/_display/plain/display.py +6 -2
- inspect_ai/_display/rich/display.py +19 -6
- inspect_ai/_display/textual/app.py +6 -1
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/transcript.py +10 -6
- inspect_ai/_eval/task/run.py +5 -8
- inspect_ai/_util/content.py +20 -1
- inspect_ai/_util/transcript.py +10 -4
- inspect_ai/_util/working.py +4 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +115 -87
- inspect_ai/_view/www/dist/assets/index.js +5324 -2276
- inspect_ai/_view/www/eslint.config.mjs +24 -1
- inspect_ai/_view/www/log-schema.json +283 -20
- inspect_ai/_view/www/package.json +8 -3
- inspect_ai/_view/www/src/App.tsx +2 -2
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
- inspect_ai/_view/www/src/components/Card.tsx +9 -8
- inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
- inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
- inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
- inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
- inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
- inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
- inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
- inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
- inspect_ai/_view/www/src/index.tsx +2 -2
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
- inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
- inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
- inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +5 -4
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +8 -7
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +34 -15
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
- inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
- inspect_ai/_view/www/src/types/log.d.ts +129 -34
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
- inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
- inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
- inspect_ai/_view/www/src/utils/format.ts +1 -1
- inspect_ai/_view/www/src/utils/json.ts +24 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -2
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
- inspect_ai/_view/www/yarn.lock +241 -5
- inspect_ai/log/_condense.py +3 -0
- inspect_ai/log/_recorders/eval.py +6 -1
- inspect_ai/log/_transcript.py +58 -1
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +7 -0
- inspect_ai/model/_chat_message.py +22 -7
- inspect_ai/model/_conversation.py +10 -8
- inspect_ai/model/_generate_config.py +25 -4
- inspect_ai/model/_model.py +133 -57
- inspect_ai/model/_model_output.py +3 -0
- inspect_ai/model/_openai.py +106 -40
- inspect_ai/model/_providers/anthropic.py +281 -153
- inspect_ai/model/_providers/google.py +27 -8
- inspect_ai/model/_providers/groq.py +9 -4
- inspect_ai/model/_providers/openai.py +57 -4
- inspect_ai/model/_providers/openai_o1.py +10 -0
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_reasoning.py +15 -2
- inspect_ai/scorer/_model.py +23 -19
- inspect_ai/solver/_human_agent/agent.py +14 -10
- inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
- inspect_ai/solver/_human_agent/commands/submit.py +76 -30
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +3 -1
- inspect_ai/tool/_tools/_computer/_common.py +117 -58
- inspect_ai/tool/_tools/_computer/_computer.py +80 -57
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_computer/test_args.py +151 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
- inspect_ai/util/__init__.py +2 -1
- inspect_ai/util/_display.py +12 -0
- inspect_ai/util/_sandbox/events.py +55 -21
- inspect_ai/util/_sandbox/self_check.py +131 -43
- inspect_ai/util/_subtask.py +11 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/RECORD +209 -186
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
- inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
- inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
- inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,10 @@
|
|
1
1
|
import functools
|
2
2
|
import os
|
3
|
+
import re
|
3
4
|
import sys
|
4
5
|
from copy import copy
|
5
6
|
from logging import getLogger
|
6
|
-
from typing import Any, Literal, Tuple, TypedDict, cast
|
7
|
+
from typing import Any, Literal, Optional, Tuple, TypedDict, cast
|
7
8
|
|
8
9
|
from .util.tracker import HttpxTimeTracker
|
9
10
|
|
@@ -28,8 +29,12 @@ from anthropic.types import (
|
|
28
29
|
ImageBlockParam,
|
29
30
|
Message,
|
30
31
|
MessageParam,
|
32
|
+
RedactedThinkingBlock,
|
33
|
+
RedactedThinkingBlockParam,
|
31
34
|
TextBlock,
|
32
35
|
TextBlockParam,
|
36
|
+
ThinkingBlock,
|
37
|
+
ThinkingBlockParam,
|
33
38
|
ToolParam,
|
34
39
|
ToolResultBlockParam,
|
35
40
|
ToolUseBlock,
|
@@ -44,7 +49,12 @@ from inspect_ai._util.constants import (
|
|
44
49
|
DEFAULT_MAX_RETRIES,
|
45
50
|
NO_CONTENT,
|
46
51
|
)
|
47
|
-
from inspect_ai._util.content import
|
52
|
+
from inspect_ai._util.content import (
|
53
|
+
Content,
|
54
|
+
ContentImage,
|
55
|
+
ContentReasoning,
|
56
|
+
ContentText,
|
57
|
+
)
|
48
58
|
from inspect_ai._util.error import exception_message
|
49
59
|
from inspect_ai._util.images import file_as_data_uri
|
50
60
|
from inspect_ai._util.logger import warn_once
|
@@ -194,7 +204,7 @@ class AnthropicAPI(ModelAPI):
|
|
194
204
|
tools_param,
|
195
205
|
messages,
|
196
206
|
computer_use,
|
197
|
-
) = await resolve_chat_input(
|
207
|
+
) = await self.resolve_chat_input(input, tools, config)
|
198
208
|
|
199
209
|
# prepare request params (assembed this way so we can log the raw model call)
|
200
210
|
request = dict(messages=messages)
|
@@ -204,23 +214,33 @@ class AnthropicAPI(ModelAPI):
|
|
204
214
|
request["system"] = system_param
|
205
215
|
request["tools"] = tools_param
|
206
216
|
if len(tools) > 0:
|
207
|
-
request["tool_choice"] = message_tool_choice(
|
217
|
+
request["tool_choice"] = message_tool_choice(
|
218
|
+
tool_choice, self.is_using_thinking(config)
|
219
|
+
)
|
208
220
|
|
209
221
|
# additional options
|
210
|
-
|
222
|
+
req, headers, betas = self.completion_config(config)
|
223
|
+
request = request | req
|
211
224
|
|
212
225
|
# extra headers (for time tracker and computer use)
|
213
|
-
extra_headers = {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
|
226
|
+
extra_headers = headers | {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
|
214
227
|
if computer_use:
|
215
|
-
|
228
|
+
betas.append("computer-use-2025-01-24")
|
229
|
+
if len(betas) > 0:
|
230
|
+
extra_headers["anthropic-beta"] = ",".join(betas)
|
231
|
+
|
216
232
|
request["extra_headers"] = extra_headers
|
217
233
|
|
218
234
|
# extra_body
|
219
235
|
if self.extra_body is not None:
|
220
236
|
request["extra_body"] = self.extra_body
|
221
237
|
|
222
|
-
# make request
|
223
|
-
|
238
|
+
# make request (stream if we are using reasoning)
|
239
|
+
if self.is_using_thinking(config):
|
240
|
+
async with self.client.messages.stream(**request) as stream:
|
241
|
+
message = await stream.get_final_message()
|
242
|
+
else:
|
243
|
+
message = await self.client.messages.create(**request, stream=False)
|
224
244
|
|
225
245
|
# set response for ModelCall
|
226
246
|
response = message.model_dump()
|
@@ -245,27 +265,70 @@ class AnthropicAPI(ModelAPI):
|
|
245
265
|
else:
|
246
266
|
raise ex
|
247
267
|
|
248
|
-
def
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
268
|
+
def completion_config(
|
269
|
+
self, config: GenerateConfig
|
270
|
+
) -> tuple[dict[str, Any], dict[str, str], list[str]]:
|
271
|
+
max_tokens = cast(int, config.max_tokens)
|
272
|
+
params = dict(model=self.model_name, max_tokens=max_tokens)
|
273
|
+
headers: dict[str, str] = {}
|
274
|
+
betas: list[str] = []
|
275
|
+
# some params not compatible with thinking models
|
276
|
+
if not self.is_using_thinking(config):
|
277
|
+
if config.temperature is not None:
|
278
|
+
params["temperature"] = config.temperature
|
279
|
+
if config.top_p is not None:
|
280
|
+
params["top_p"] = config.top_p
|
281
|
+
if config.top_k is not None:
|
282
|
+
params["top_k"] = config.top_k
|
283
|
+
|
284
|
+
# some thinking-only stuff
|
285
|
+
if self.is_using_thinking(config):
|
286
|
+
params["thinking"] = dict(
|
287
|
+
type="enabled", budget_tokens=config.reasoning_tokens
|
288
|
+
)
|
289
|
+
headers["anthropic-version"] = "2023-06-01"
|
290
|
+
if max_tokens > 8192:
|
291
|
+
betas.append("output-128k-2025-02-19")
|
292
|
+
|
293
|
+
# config that applies to all models
|
256
294
|
if config.timeout is not None:
|
257
295
|
params["timeout"] = float(config.timeout)
|
258
296
|
if config.stop_seqs is not None:
|
259
297
|
params["stop_sequences"] = config.stop_seqs
|
260
|
-
|
298
|
+
|
299
|
+
# return config
|
300
|
+
return params, headers, betas
|
261
301
|
|
262
302
|
@override
|
263
303
|
def max_tokens(self) -> int | None:
|
264
304
|
# anthropic requires you to explicitly specify max_tokens (most others
|
265
305
|
# set it to the maximum allowable output tokens for the model).
|
266
|
-
# set to 4096 which is the
|
306
|
+
# set to 4096 which is the highest possible for claude 3 (claude 3.5
|
307
|
+
# allows up to 8192)
|
267
308
|
return 4096
|
268
309
|
|
310
|
+
@override
|
311
|
+
def max_tokens_for_config(self, config: GenerateConfig) -> int | None:
|
312
|
+
max_tokens = cast(int, self.max_tokens())
|
313
|
+
if self.is_thinking_model() and config.reasoning_tokens is not None:
|
314
|
+
max_tokens = max_tokens + config.reasoning_tokens
|
315
|
+
return max_tokens
|
316
|
+
|
317
|
+
def is_using_thinking(self, config: GenerateConfig) -> bool:
|
318
|
+
return self.is_thinking_model() and config.reasoning_tokens is not None
|
319
|
+
|
320
|
+
def is_thinking_model(self) -> bool:
|
321
|
+
return not self.is_claude_3() and not self.is_claude_3_5()
|
322
|
+
|
323
|
+
def is_claude_3(self) -> bool:
|
324
|
+
return re.search(r"claude-3-[a-zA-Z]", self.model_name) is not None
|
325
|
+
|
326
|
+
def is_claude_3_5(self) -> bool:
|
327
|
+
return "claude-3-5-" in self.model_name
|
328
|
+
|
329
|
+
def is_claude_3_7(self) -> bool:
|
330
|
+
return "claude-3-7-" in self.model_name
|
331
|
+
|
269
332
|
@override
|
270
333
|
def connection_key(self) -> str:
|
271
334
|
return str(self.api_key)
|
@@ -295,6 +358,14 @@ class AnthropicAPI(ModelAPI):
|
|
295
358
|
def tool_result_images(self) -> bool:
|
296
359
|
return True
|
297
360
|
|
361
|
+
@override
|
362
|
+
def emulate_reasoning_history(self) -> bool:
|
363
|
+
return False
|
364
|
+
|
365
|
+
@override
|
366
|
+
def force_reasoning_history(self) -> Literal["none", "all", "last"] | None:
|
367
|
+
return "all"
|
368
|
+
|
298
369
|
# convert some common BadRequestError states into 'refusal' model output
|
299
370
|
def handle_bad_request(self, ex: BadRequestError) -> ModelOutput | Exception:
|
300
371
|
error = exception_message(ex).lower()
|
@@ -329,6 +400,148 @@ class AnthropicAPI(ModelAPI):
|
|
329
400
|
else:
|
330
401
|
return ex
|
331
402
|
|
403
|
+
async def resolve_chat_input(
|
404
|
+
self,
|
405
|
+
input: list[ChatMessage],
|
406
|
+
tools: list[ToolInfo],
|
407
|
+
config: GenerateConfig,
|
408
|
+
) -> Tuple[
|
409
|
+
list[TextBlockParam] | None, list["ToolParamDef"], list[MessageParam], bool
|
410
|
+
]:
|
411
|
+
# extract system message
|
412
|
+
system_messages, messages = split_system_messages(input, config)
|
413
|
+
|
414
|
+
# messages
|
415
|
+
message_params = [(await message_param(message)) for message in messages]
|
416
|
+
|
417
|
+
# collapse user messages (as Inspect 'tool' messages become Claude 'user' messages)
|
418
|
+
message_params = functools.reduce(
|
419
|
+
consecutive_user_message_reducer, message_params, []
|
420
|
+
)
|
421
|
+
|
422
|
+
# tools
|
423
|
+
tools_params, computer_use = self.tool_params_for_tools(tools, config)
|
424
|
+
|
425
|
+
# system messages
|
426
|
+
if len(system_messages) > 0:
|
427
|
+
system_param: list[TextBlockParam] | None = [
|
428
|
+
TextBlockParam(type="text", text=message.text)
|
429
|
+
for message in system_messages
|
430
|
+
]
|
431
|
+
else:
|
432
|
+
system_param = None
|
433
|
+
|
434
|
+
# add caching directives if necessary
|
435
|
+
cache_prompt = (
|
436
|
+
config.cache_prompt
|
437
|
+
if isinstance(config.cache_prompt, bool)
|
438
|
+
else True
|
439
|
+
if len(tools_params)
|
440
|
+
else False
|
441
|
+
)
|
442
|
+
|
443
|
+
# only certain claude models qualify
|
444
|
+
if cache_prompt:
|
445
|
+
if (
|
446
|
+
"claude-3-sonnet" in self.model_name
|
447
|
+
or "claude-2" in self.model_name
|
448
|
+
or "claude-instant" in self.model_name
|
449
|
+
):
|
450
|
+
cache_prompt = False
|
451
|
+
|
452
|
+
if cache_prompt:
|
453
|
+
# system
|
454
|
+
if system_param:
|
455
|
+
add_cache_control(system_param[-1])
|
456
|
+
# tools
|
457
|
+
if tools_params:
|
458
|
+
add_cache_control(tools_params[-1])
|
459
|
+
# last 2 user messages
|
460
|
+
user_message_params = list(
|
461
|
+
filter(lambda m: m["role"] == "user", reversed(message_params))
|
462
|
+
)
|
463
|
+
for message in user_message_params[:2]:
|
464
|
+
if isinstance(message["content"], str):
|
465
|
+
text_param = TextBlockParam(type="text", text=message["content"])
|
466
|
+
add_cache_control(text_param)
|
467
|
+
message["content"] = [text_param]
|
468
|
+
else:
|
469
|
+
content = list(message["content"])
|
470
|
+
add_cache_control(cast(dict[str, Any], content[-1]))
|
471
|
+
|
472
|
+
# return chat input
|
473
|
+
return system_param, tools_params, message_params, computer_use
|
474
|
+
|
475
|
+
def tool_params_for_tools(
|
476
|
+
self, tools: list[ToolInfo], config: GenerateConfig
|
477
|
+
) -> tuple[list["ToolParamDef"], bool]:
|
478
|
+
# tool params and computer_use bit to return
|
479
|
+
tool_params: list["ToolParamDef"] = []
|
480
|
+
computer_use = False
|
481
|
+
|
482
|
+
# for each tool, check if it has a native computer use implementation and use that
|
483
|
+
# when available (noting that we need to set the computer use request header)
|
484
|
+
for tool in tools:
|
485
|
+
computer_use_tool = (
|
486
|
+
self.computer_use_tool_param(tool)
|
487
|
+
if config.internal_tools is not False
|
488
|
+
else None
|
489
|
+
)
|
490
|
+
if computer_use_tool:
|
491
|
+
tool_params.append(computer_use_tool)
|
492
|
+
computer_use = True
|
493
|
+
else:
|
494
|
+
tool_params.append(
|
495
|
+
ToolParam(
|
496
|
+
name=tool.name,
|
497
|
+
description=tool.description,
|
498
|
+
input_schema=tool.parameters.model_dump(exclude_none=True),
|
499
|
+
)
|
500
|
+
)
|
501
|
+
|
502
|
+
return tool_params, computer_use
|
503
|
+
|
504
|
+
def computer_use_tool_param(
|
505
|
+
self, tool: ToolInfo
|
506
|
+
) -> Optional["ComputerUseToolParam"]:
|
507
|
+
# check for compatible 'computer' tool
|
508
|
+
if tool.name == "computer" and (
|
509
|
+
sorted(tool.parameters.properties.keys())
|
510
|
+
== sorted(
|
511
|
+
[
|
512
|
+
"action",
|
513
|
+
"coordinate",
|
514
|
+
"duration",
|
515
|
+
"scroll_amount",
|
516
|
+
"scroll_direction",
|
517
|
+
"start_coordinate",
|
518
|
+
"text",
|
519
|
+
]
|
520
|
+
)
|
521
|
+
):
|
522
|
+
if self.is_claude_3_5():
|
523
|
+
warn_once(
|
524
|
+
logger,
|
525
|
+
"Use of Anthropic's native computer use support is not enabled in Claude 3.5. Please use 3.7 or later to leverage the native support.",
|
526
|
+
)
|
527
|
+
return None
|
528
|
+
return ComputerUseToolParam(
|
529
|
+
type="computer_20250124",
|
530
|
+
name="computer",
|
531
|
+
# Note: The dimensions passed here for display_width_px and display_height_px should
|
532
|
+
# match the dimensions of screenshots returned by the tool.
|
533
|
+
# Those dimensions will always be one of the values in MAX_SCALING_TARGETS
|
534
|
+
# in _x11_client.py.
|
535
|
+
# TODO: enhance this code to calculate the dimensions based on the scaled screen
|
536
|
+
# size used by the container.
|
537
|
+
display_width_px=1366,
|
538
|
+
display_height_px=768,
|
539
|
+
display_number=1,
|
540
|
+
)
|
541
|
+
# not a computer_use tool
|
542
|
+
else:
|
543
|
+
return None
|
544
|
+
|
332
545
|
|
333
546
|
# native anthropic tool definitions for computer use beta
|
334
547
|
# https://docs.anthropic.com/en/docs/build-with-claude/computer-use
|
@@ -344,131 +557,6 @@ class ComputerUseToolParam(TypedDict):
|
|
344
557
|
ToolParamDef = ToolParam | ComputerUseToolParam
|
345
558
|
|
346
559
|
|
347
|
-
async def resolve_chat_input(
|
348
|
-
model: str,
|
349
|
-
input: list[ChatMessage],
|
350
|
-
tools: list[ToolInfo],
|
351
|
-
config: GenerateConfig,
|
352
|
-
) -> Tuple[list[TextBlockParam] | None, list[ToolParamDef], list[MessageParam], bool]:
|
353
|
-
# extract system message
|
354
|
-
system_messages, messages = split_system_messages(input, config)
|
355
|
-
|
356
|
-
# messages
|
357
|
-
message_params = [(await message_param(message)) for message in messages]
|
358
|
-
|
359
|
-
# collapse user messages (as Inspect 'tool' messages become Claude 'user' messages)
|
360
|
-
message_params = functools.reduce(
|
361
|
-
consecutive_user_message_reducer, message_params, []
|
362
|
-
)
|
363
|
-
|
364
|
-
# tools
|
365
|
-
tools_params, computer_use = tool_params_for_tools(tools, config)
|
366
|
-
|
367
|
-
# system messages
|
368
|
-
if len(system_messages) > 0:
|
369
|
-
system_param: list[TextBlockParam] | None = [
|
370
|
-
TextBlockParam(type="text", text=message.text)
|
371
|
-
for message in system_messages
|
372
|
-
]
|
373
|
-
else:
|
374
|
-
system_param = None
|
375
|
-
|
376
|
-
# add caching directives if necessary
|
377
|
-
cache_prompt = (
|
378
|
-
config.cache_prompt
|
379
|
-
if isinstance(config.cache_prompt, bool)
|
380
|
-
else True
|
381
|
-
if len(tools_params)
|
382
|
-
else False
|
383
|
-
)
|
384
|
-
|
385
|
-
# only certain claude models qualify
|
386
|
-
if cache_prompt:
|
387
|
-
if (
|
388
|
-
"claude-3-sonnet" in model
|
389
|
-
or "claude-2" in model
|
390
|
-
or "claude-instant" in model
|
391
|
-
):
|
392
|
-
cache_prompt = False
|
393
|
-
|
394
|
-
if cache_prompt:
|
395
|
-
# system
|
396
|
-
if system_param:
|
397
|
-
add_cache_control(system_param[-1])
|
398
|
-
# tools
|
399
|
-
if tools_params:
|
400
|
-
add_cache_control(tools_params[-1])
|
401
|
-
# last 2 user messages
|
402
|
-
user_message_params = list(
|
403
|
-
filter(lambda m: m["role"] == "user", reversed(message_params))
|
404
|
-
)
|
405
|
-
for message in user_message_params[:2]:
|
406
|
-
if isinstance(message["content"], str):
|
407
|
-
text_param = TextBlockParam(type="text", text=message["content"])
|
408
|
-
add_cache_control(text_param)
|
409
|
-
message["content"] = [text_param]
|
410
|
-
else:
|
411
|
-
content = list(message["content"])
|
412
|
-
add_cache_control(cast(dict[str, Any], content[-1]))
|
413
|
-
|
414
|
-
# return chat input
|
415
|
-
return system_param, tools_params, message_params, computer_use
|
416
|
-
|
417
|
-
|
418
|
-
def tool_params_for_tools(
|
419
|
-
tools: list[ToolInfo], config: GenerateConfig
|
420
|
-
) -> tuple[list[ToolParamDef], bool]:
|
421
|
-
# tool params and computer_use bit to return
|
422
|
-
tool_params: list[ToolParamDef] = []
|
423
|
-
computer_use = False
|
424
|
-
|
425
|
-
# for each tool, check if it has a native computer use implementation and use that
|
426
|
-
# when available (noting that we need to set the computer use request header)
|
427
|
-
for tool in tools:
|
428
|
-
computer_use_tool = (
|
429
|
-
computer_use_tool_param(tool)
|
430
|
-
if config.internal_tools is not False
|
431
|
-
else None
|
432
|
-
)
|
433
|
-
if computer_use_tool:
|
434
|
-
tool_params.append(computer_use_tool)
|
435
|
-
computer_use = True
|
436
|
-
else:
|
437
|
-
tool_params.append(
|
438
|
-
ToolParam(
|
439
|
-
name=tool.name,
|
440
|
-
description=tool.description,
|
441
|
-
input_schema=tool.parameters.model_dump(exclude_none=True),
|
442
|
-
)
|
443
|
-
)
|
444
|
-
|
445
|
-
return tool_params, computer_use
|
446
|
-
|
447
|
-
|
448
|
-
def computer_use_tool_param(tool: ToolInfo) -> ComputerUseToolParam | None:
|
449
|
-
# check for compatible 'computer' tool
|
450
|
-
if tool.name == "computer" and (
|
451
|
-
sorted(tool.parameters.properties.keys())
|
452
|
-
== sorted(["action", "coordinate", "text"])
|
453
|
-
):
|
454
|
-
return ComputerUseToolParam(
|
455
|
-
type="computer_20241022",
|
456
|
-
name="computer",
|
457
|
-
# Note: The dimensions passed here for display_width_px and display_height_px should
|
458
|
-
# match the dimensions of screenshots returned by the tool.
|
459
|
-
# Those dimensions will always be one of the values in MAX_SCALING_TARGETS
|
460
|
-
# in _x11_client.py.
|
461
|
-
# TODO: enhance this code to calculate the dimensions based on the scaled screen
|
462
|
-
# size used by the container.
|
463
|
-
display_width_px=1366,
|
464
|
-
display_height_px=768,
|
465
|
-
display_number=1,
|
466
|
-
)
|
467
|
-
# not a computer_use tool
|
468
|
-
else:
|
469
|
-
return None
|
470
|
-
|
471
|
-
|
472
560
|
def add_cache_control(
|
473
561
|
param: TextBlockParam | ToolParam | ComputerUseToolParam | dict[str, Any],
|
474
562
|
) -> None:
|
@@ -498,7 +586,7 @@ def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
|
|
498
586
|
role = a["role"]
|
499
587
|
a_content = a["content"]
|
500
588
|
b_content = b["content"]
|
501
|
-
if isinstance(a_content, str) and isinstance(
|
589
|
+
if isinstance(a_content, str) and isinstance(b_content, str):
|
502
590
|
return MessageParam(role=role, content=f"{a_content}\n{b_content}")
|
503
591
|
elif isinstance(a_content, list) and isinstance(b_content, list):
|
504
592
|
return MessageParam(role=role, content=a_content + b_content)
|
@@ -514,9 +602,15 @@ def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
|
|
514
602
|
raise ValueError(f"Unexpected content types for messages: {a}, {b}")
|
515
603
|
|
516
604
|
|
517
|
-
def message_tool_choice(
|
605
|
+
def message_tool_choice(
|
606
|
+
tool_choice: ToolChoice, thinking_model: bool
|
607
|
+
) -> message_create_params.ToolChoice:
|
518
608
|
if isinstance(tool_choice, ToolFunction):
|
519
|
-
|
609
|
+
# forced tool use not compatible with thinking models
|
610
|
+
if thinking_model:
|
611
|
+
return {"type": "any"}
|
612
|
+
else:
|
613
|
+
return {"type": "tool", "name": tool_choice.name}
|
520
614
|
elif tool_choice == "any":
|
521
615
|
return {"type": "any"}
|
522
616
|
elif tool_choice == "none":
|
@@ -544,9 +638,15 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
544
638
|
# "tool" means serving a tool call result back to claude
|
545
639
|
elif message.role == "tool":
|
546
640
|
if message.error is not None:
|
547
|
-
content:
|
548
|
-
|
549
|
-
|
641
|
+
content: (
|
642
|
+
str
|
643
|
+
| list[
|
644
|
+
TextBlockParam
|
645
|
+
| ImageBlockParam
|
646
|
+
| ThinkingBlockParam
|
647
|
+
| RedactedThinkingBlockParam
|
648
|
+
]
|
649
|
+
) = message.error.message
|
550
650
|
# anthropic requires that content be populated when
|
551
651
|
# is_error is true (throws bad_request_error when not)
|
552
652
|
# so make sure this precondition is met
|
@@ -567,7 +667,7 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
567
667
|
ToolResultBlockParam(
|
568
668
|
tool_use_id=str(message.tool_call_id),
|
569
669
|
type="tool_result",
|
570
|
-
content=content,
|
670
|
+
content=cast(list[TextBlockParam | ImageBlockParam], content),
|
571
671
|
is_error=message.error is not None,
|
572
672
|
)
|
573
673
|
],
|
@@ -576,7 +676,13 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
576
676
|
# tool_calls means claude is attempting to call our tools
|
577
677
|
elif message.role == "assistant" and message.tool_calls:
|
578
678
|
# first include content (claude <thinking>)
|
579
|
-
tools_content: list[
|
679
|
+
tools_content: list[
|
680
|
+
TextBlockParam
|
681
|
+
| ThinkingBlockParam
|
682
|
+
| RedactedThinkingBlockParam
|
683
|
+
| ImageBlockParam
|
684
|
+
| ToolUseBlockParam
|
685
|
+
] = (
|
580
686
|
[TextBlockParam(type="text", text=message.content or NO_CONTENT)]
|
581
687
|
if isinstance(message.content, str)
|
582
688
|
else (
|
@@ -645,6 +751,16 @@ def model_output_from_message(message: Message, tools: list[ToolInfo]) -> ModelO
|
|
645
751
|
arguments=content_block.model_dump().get("input", {}),
|
646
752
|
)
|
647
753
|
)
|
754
|
+
elif isinstance(content_block, RedactedThinkingBlock):
|
755
|
+
content.append(
|
756
|
+
ContentReasoning(reasoning=content_block.data, redacted=True)
|
757
|
+
)
|
758
|
+
elif isinstance(content_block, ThinkingBlock):
|
759
|
+
content.append(
|
760
|
+
ContentReasoning(
|
761
|
+
reasoning=content_block.thinking, signature=content_block.signature
|
762
|
+
)
|
763
|
+
)
|
648
764
|
|
649
765
|
# resolve choice
|
650
766
|
choice = ChatCompletionChoice(
|
@@ -702,7 +818,7 @@ def split_system_messages(
|
|
702
818
|
|
703
819
|
async def message_param_content(
|
704
820
|
content: Content,
|
705
|
-
) -> TextBlockParam | ImageBlockParam:
|
821
|
+
) -> TextBlockParam | ImageBlockParam | ThinkingBlockParam | RedactedThinkingBlockParam:
|
706
822
|
if isinstance(content, ContentText):
|
707
823
|
return TextBlockParam(type="text", text=content.text or NO_CONTENT)
|
708
824
|
elif isinstance(content, ContentImage):
|
@@ -720,6 +836,18 @@ async def message_param_content(
|
|
720
836
|
type="image",
|
721
837
|
source=dict(type="base64", media_type=cast(Any, media_type), data=image),
|
722
838
|
)
|
839
|
+
elif isinstance(content, ContentReasoning):
|
840
|
+
if content.redacted:
|
841
|
+
return RedactedThinkingBlockParam(
|
842
|
+
type="redacted_thinking",
|
843
|
+
data=content.reasoning,
|
844
|
+
)
|
845
|
+
else:
|
846
|
+
if content.signature is None:
|
847
|
+
raise ValueError("Thinking content without signature.")
|
848
|
+
return ThinkingBlockParam(
|
849
|
+
type="thinking", thinking=content.reasoning, signature=content.signature
|
850
|
+
)
|
723
851
|
else:
|
724
852
|
raise RuntimeError(
|
725
853
|
"Anthropic models do not currently support audio or video inputs."
|
@@ -38,10 +38,13 @@ from pydantic import JsonValue
|
|
38
38
|
from typing_extensions import override
|
39
39
|
|
40
40
|
from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
|
41
|
-
from inspect_ai._util.content import
|
41
|
+
from inspect_ai._util.content import (
|
42
|
+
Content as InspectContent,
|
43
|
+
)
|
42
44
|
from inspect_ai._util.content import (
|
43
45
|
ContentAudio,
|
44
46
|
ContentImage,
|
47
|
+
ContentReasoning,
|
45
48
|
ContentText,
|
46
49
|
ContentVideo,
|
47
50
|
)
|
@@ -250,7 +253,10 @@ class GoogleGenAIAPI(ModelAPI):
|
|
250
253
|
|
251
254
|
@override
|
252
255
|
def is_rate_limit(self, ex: BaseException) -> bool:
|
253
|
-
|
256
|
+
# see https://cloud.google.com/storage/docs/retry-strategy
|
257
|
+
return isinstance(ex, APIError) and (
|
258
|
+
ex.code in (408, 429, 429) or ex.code >= 500
|
259
|
+
)
|
254
260
|
|
255
261
|
@override
|
256
262
|
def connection_key(self) -> str:
|
@@ -405,6 +411,8 @@ async def content_part(client: Client, content: InspectContent | str) -> Part:
|
|
405
411
|
return Part.from_text(text=content or NO_CONTENT)
|
406
412
|
elif isinstance(content, ContentText):
|
407
413
|
return Part.from_text(text=content.text or NO_CONTENT)
|
414
|
+
elif isinstance(content, ContentReasoning):
|
415
|
+
return Part.from_text(text=content.reasoning or NO_CONTENT)
|
408
416
|
else:
|
409
417
|
return await chat_content_to_part(client, content)
|
410
418
|
|
@@ -417,7 +425,8 @@ async def chat_content_to_part(
|
|
417
425
|
content_bytes, mime_type = await file_as_data(content.image)
|
418
426
|
return Part.from_bytes(mime_type=mime_type, data=content_bytes)
|
419
427
|
else:
|
420
|
-
|
428
|
+
file = await file_for_content(client, content)
|
429
|
+
return Part.from_uri(file_uri=file.uri, mime_type=file.mime_type)
|
421
430
|
|
422
431
|
|
423
432
|
async def extract_system_message_as_parts(
|
@@ -552,11 +561,19 @@ def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoi
|
|
552
561
|
# stop reason
|
553
562
|
stop_reason = finish_reason_to_stop_reason(candidate.finish_reason)
|
554
563
|
|
564
|
+
# choice content may include reasoning
|
565
|
+
if reasoning:
|
566
|
+
choice_content: str | list[Content] = [
|
567
|
+
ContentReasoning(reasoning=reasoning),
|
568
|
+
ContentText(text=content),
|
569
|
+
]
|
570
|
+
else:
|
571
|
+
choice_content = content
|
572
|
+
|
555
573
|
# build choice
|
556
574
|
choice = ChatCompletionChoice(
|
557
575
|
message=ChatMessageAssistant(
|
558
|
-
content=
|
559
|
-
reasoning=reasoning,
|
576
|
+
content=choice_content,
|
560
577
|
tool_calls=tool_calls if len(tool_calls) > 0 else None,
|
561
578
|
source="generate",
|
562
579
|
),
|
@@ -742,7 +759,7 @@ async def file_for_content(
|
|
742
759
|
uploaded_file = files_db.get(content_sha256)
|
743
760
|
if uploaded_file:
|
744
761
|
try:
|
745
|
-
upload: File = client.files.get(uploaded_file)
|
762
|
+
upload: File = client.files.get(name=uploaded_file)
|
746
763
|
if upload.state.name == "ACTIVE":
|
747
764
|
trace(f"Using uploaded file: {uploaded_file}")
|
748
765
|
return upload
|
@@ -754,10 +771,12 @@ async def file_for_content(
|
|
754
771
|
trace(f"Error attempting to access uploaded file: {ex}")
|
755
772
|
files_db.delete(content_sha256)
|
756
773
|
# do the upload (and record it)
|
757
|
-
upload = client.files.upload(
|
774
|
+
upload = client.files.upload(
|
775
|
+
file=BytesIO(content_bytes), config=dict(mime_type=mime_type)
|
776
|
+
)
|
758
777
|
while upload.state.name == "PROCESSING":
|
759
778
|
await asyncio.sleep(3)
|
760
|
-
upload = client.files.get(upload.name)
|
779
|
+
upload = client.files.get(name=upload.name)
|
761
780
|
if upload.state.name == "FAILED":
|
762
781
|
trace(f"Failed to upload file '{upload.name}: {upload.error}")
|
763
782
|
raise ValueError(f"Google file upload failed: {upload.error}")
|
@@ -28,7 +28,7 @@ from inspect_ai._util.constants import (
|
|
28
28
|
DEFAULT_MAX_RETRIES,
|
29
29
|
DEFAULT_MAX_TOKENS,
|
30
30
|
)
|
31
|
-
from inspect_ai._util.content import Content
|
31
|
+
from inspect_ai._util.content import Content, ContentReasoning, ContentText
|
32
32
|
from inspect_ai._util.images import file_as_data_uri
|
33
33
|
from inspect_ai._util.url import is_http_url
|
34
34
|
from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
|
@@ -326,12 +326,17 @@ def chat_tool_calls(message: Any, tools: list[ToolInfo]) -> Optional[List[ToolCa
|
|
326
326
|
def chat_message_assistant(message: Any, tools: list[ToolInfo]) -> ChatMessageAssistant:
|
327
327
|
reasoning = getattr(message, "reasoning", None)
|
328
328
|
if reasoning is not None:
|
329
|
-
|
329
|
+
content: str | list[Content] = [
|
330
|
+
ContentReasoning(reasoning=str(reasoning)),
|
331
|
+
ContentText(text=message.content or ""),
|
332
|
+
]
|
333
|
+
else:
|
334
|
+
content = message.content or ""
|
335
|
+
|
330
336
|
return ChatMessageAssistant(
|
331
|
-
content=
|
337
|
+
content=content,
|
332
338
|
source="generate",
|
333
339
|
tool_calls=chat_tool_calls(message, tools),
|
334
|
-
reasoning=reasoning,
|
335
340
|
)
|
336
341
|
|
337
342
|
|