inspect-ai 0.3.69__py3-none-any.whl → 0.3.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +27 -9
- inspect_ai/_display/core/display.py +2 -0
- inspect_ai/_display/core/footer.py +13 -3
- inspect_ai/_display/plain/display.py +6 -2
- inspect_ai/_display/rich/display.py +19 -6
- inspect_ai/_display/textual/app.py +9 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +4 -10
- inspect_ai/_display/textual/widgets/transcript.py +35 -18
- inspect_ai/_eval/eval.py +14 -2
- inspect_ai/_eval/evalset.py +6 -1
- inspect_ai/_eval/run.py +6 -0
- inspect_ai/_eval/task/run.py +49 -23
- inspect_ai/_eval/task/task.py +26 -3
- inspect_ai/_util/content.py +20 -1
- inspect_ai/_util/interrupt.py +6 -0
- inspect_ai/_util/logger.py +19 -0
- inspect_ai/_util/rich.py +7 -8
- inspect_ai/_util/text.py +13 -0
- inspect_ai/_util/transcript.py +20 -6
- inspect_ai/_util/working.py +50 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +171 -99
- inspect_ai/_view/www/dist/assets/index.js +5972 -2770
- inspect_ai/_view/www/eslint.config.mjs +24 -1
- inspect_ai/_view/www/log-schema.json +619 -21
- inspect_ai/_view/www/package.json +8 -3
- inspect_ai/_view/www/src/App.tsx +2 -2
- inspect_ai/_view/www/src/appearance/icons.ts +3 -1
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
- inspect_ai/_view/www/src/components/Card.tsx +9 -8
- inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
- inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
- inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
- inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
- inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
- inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
- inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
- inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
- inspect_ai/_view/www/src/index.tsx +2 -2
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -1
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
- inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
- inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +30 -3
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +25 -4
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
- inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +9 -4
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +153 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -5
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +53 -16
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
- inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
- inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
- inspect_ai/_view/www/src/types/log.d.ts +312 -137
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
- inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
- inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
- inspect_ai/_view/www/src/utils/format.ts +8 -5
- inspect_ai/_view/www/src/utils/json.ts +24 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +18 -8
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
- inspect_ai/_view/www/yarn.lock +241 -5
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_condense.py +4 -0
- inspect_ai/log/_log.py +72 -12
- inspect_ai/log/_recorders/eval.py +6 -1
- inspect_ai/log/_samples.py +5 -1
- inspect_ai/log/_transcript.py +89 -2
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +8 -1
- inspect_ai/model/_chat_message.py +22 -7
- inspect_ai/model/_conversation.py +11 -9
- inspect_ai/model/_generate_config.py +25 -4
- inspect_ai/model/_model.py +164 -72
- inspect_ai/model/_model_call.py +10 -3
- inspect_ai/model/_model_output.py +3 -0
- inspect_ai/model/_openai.py +106 -40
- inspect_ai/model/_providers/anthropic.py +145 -26
- inspect_ai/model/_providers/bedrock.py +7 -0
- inspect_ai/model/_providers/cloudflare.py +20 -7
- inspect_ai/model/_providers/google.py +29 -8
- inspect_ai/model/_providers/groq.py +66 -27
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +78 -51
- inspect_ai/model/_providers/openai.py +66 -4
- inspect_ai/model/_providers/openai_o1.py +10 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/tracker.py +92 -0
- inspect_ai/model/_providers/vllm.py +13 -5
- inspect_ai/model/_reasoning.py +15 -2
- inspect_ai/scorer/_model.py +23 -19
- inspect_ai/solver/_basic_agent.py +1 -3
- inspect_ai/solver/_bridge/patch.py +0 -2
- inspect_ai/solver/_human_agent/agent.py +14 -10
- inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
- inspect_ai/solver/_human_agent/commands/submit.py +76 -30
- inspect_ai/solver/_limit.py +4 -4
- inspect_ai/solver/_plan.py +0 -3
- inspect_ai/solver/_task_state.py +7 -0
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +3 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
- inspect_ai/tool/_tools/_web_search.py +3 -3
- inspect_ai/util/__init__.py +2 -1
- inspect_ai/util/_concurrency.py +14 -8
- inspect_ai/util/_display.py +12 -0
- inspect_ai/util/_sandbox/context.py +15 -0
- inspect_ai/util/_sandbox/docker/docker.py +7 -5
- inspect_ai/util/_sandbox/environment.py +32 -1
- inspect_ai/util/_sandbox/events.py +183 -0
- inspect_ai/util/_sandbox/local.py +3 -3
- inspect_ai/util/_sandbox/self_check.py +131 -43
- inspect_ai/util/_subtask.py +11 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/RECORD +233 -211
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
- inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
- inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,13 @@
|
|
1
1
|
import functools
|
2
2
|
import os
|
3
|
+
import re
|
3
4
|
import sys
|
4
5
|
from copy import copy
|
5
6
|
from logging import getLogger
|
6
7
|
from typing import Any, Literal, Tuple, TypedDict, cast
|
7
8
|
|
9
|
+
from .util.tracker import HttpxTimeTracker
|
10
|
+
|
8
11
|
if sys.version_info >= (3, 11):
|
9
12
|
from typing import NotRequired
|
10
13
|
else:
|
@@ -26,8 +29,12 @@ from anthropic.types import (
|
|
26
29
|
ImageBlockParam,
|
27
30
|
Message,
|
28
31
|
MessageParam,
|
32
|
+
RedactedThinkingBlock,
|
33
|
+
RedactedThinkingBlockParam,
|
29
34
|
TextBlock,
|
30
35
|
TextBlockParam,
|
36
|
+
ThinkingBlock,
|
37
|
+
ThinkingBlockParam,
|
31
38
|
ToolParam,
|
32
39
|
ToolResultBlockParam,
|
33
40
|
ToolUseBlock,
|
@@ -42,7 +49,12 @@ from inspect_ai._util.constants import (
|
|
42
49
|
DEFAULT_MAX_RETRIES,
|
43
50
|
NO_CONTENT,
|
44
51
|
)
|
45
|
-
from inspect_ai._util.content import
|
52
|
+
from inspect_ai._util.content import (
|
53
|
+
Content,
|
54
|
+
ContentImage,
|
55
|
+
ContentReasoning,
|
56
|
+
ContentText,
|
57
|
+
)
|
46
58
|
from inspect_ai._util.error import exception_message
|
47
59
|
from inspect_ai._util.images import file_as_data_uri
|
48
60
|
from inspect_ai._util.logger import warn_once
|
@@ -150,6 +162,9 @@ class AnthropicAPI(ModelAPI):
|
|
150
162
|
**model_args,
|
151
163
|
)
|
152
164
|
|
165
|
+
# create time tracker
|
166
|
+
self._time_tracker = HttpxTimeTracker(self.client._client)
|
167
|
+
|
153
168
|
@override
|
154
169
|
async def close(self) -> None:
|
155
170
|
await self.client.close()
|
@@ -167,6 +182,9 @@ class AnthropicAPI(ModelAPI):
|
|
167
182
|
tool_choice: ToolChoice,
|
168
183
|
config: GenerateConfig,
|
169
184
|
) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
|
185
|
+
# allocate request_id (so we can see it from ModelCall)
|
186
|
+
request_id = self._time_tracker.start_request()
|
187
|
+
|
170
188
|
# setup request and response for ModelCall
|
171
189
|
request: dict[str, Any] = {}
|
172
190
|
response: dict[str, Any] = {}
|
@@ -176,6 +194,7 @@ class AnthropicAPI(ModelAPI):
|
|
176
194
|
request=request,
|
177
195
|
response=response,
|
178
196
|
filter=model_call_filter,
|
197
|
+
time=self._time_tracker.end_request(request_id),
|
179
198
|
)
|
180
199
|
|
181
200
|
# generate
|
@@ -195,21 +214,33 @@ class AnthropicAPI(ModelAPI):
|
|
195
214
|
request["system"] = system_param
|
196
215
|
request["tools"] = tools_param
|
197
216
|
if len(tools) > 0:
|
198
|
-
request["tool_choice"] = message_tool_choice(
|
217
|
+
request["tool_choice"] = message_tool_choice(
|
218
|
+
tool_choice, self.is_using_thinking(config)
|
219
|
+
)
|
199
220
|
|
200
221
|
# additional options
|
201
|
-
|
222
|
+
req, headers, betas = self.completion_config(config)
|
223
|
+
request = request | req
|
202
224
|
|
203
|
-
# computer use
|
225
|
+
# extra headers (for time tracker and computer use)
|
226
|
+
extra_headers = headers | {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
|
204
227
|
if computer_use:
|
205
|
-
|
228
|
+
betas.append("computer-use-2024-10-22")
|
229
|
+
if len(betas) > 0:
|
230
|
+
extra_headers["anthropic-beta"] = ",".join(betas)
|
231
|
+
|
232
|
+
request["extra_headers"] = extra_headers
|
206
233
|
|
207
234
|
# extra_body
|
208
235
|
if self.extra_body is not None:
|
209
236
|
request["extra_body"] = self.extra_body
|
210
237
|
|
211
|
-
# make request
|
212
|
-
|
238
|
+
# make request (stream if we are using reasoning)
|
239
|
+
if self.is_using_thinking(config):
|
240
|
+
async with self.client.messages.stream(**request) as stream:
|
241
|
+
message = await stream.get_final_message()
|
242
|
+
else:
|
243
|
+
message = await self.client.messages.create(**request, stream=False)
|
213
244
|
|
214
245
|
# set response for ModelCall
|
215
246
|
response = message.model_dump()
|
@@ -234,27 +265,67 @@ class AnthropicAPI(ModelAPI):
|
|
234
265
|
else:
|
235
266
|
raise ex
|
236
267
|
|
237
|
-
def
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
268
|
+
def completion_config(
|
269
|
+
self, config: GenerateConfig
|
270
|
+
) -> tuple[dict[str, Any], dict[str, str], list[str]]:
|
271
|
+
max_tokens = cast(int, config.max_tokens)
|
272
|
+
params = dict(model=self.model_name, max_tokens=max_tokens)
|
273
|
+
headers: dict[str, str] = {}
|
274
|
+
betas: list[str] = []
|
275
|
+
# some params not compatible with thinking models
|
276
|
+
if not self.is_using_thinking(config):
|
277
|
+
if config.temperature is not None:
|
278
|
+
params["temperature"] = config.temperature
|
279
|
+
if config.top_p is not None:
|
280
|
+
params["top_p"] = config.top_p
|
281
|
+
if config.top_k is not None:
|
282
|
+
params["top_k"] = config.top_k
|
283
|
+
|
284
|
+
# some thinking-only stuff
|
285
|
+
if self.is_using_thinking(config):
|
286
|
+
params["thinking"] = dict(
|
287
|
+
type="enabled", budget_tokens=config.reasoning_tokens
|
288
|
+
)
|
289
|
+
headers["anthropic-version"] = "2023-06-01"
|
290
|
+
if max_tokens > 8192:
|
291
|
+
betas.append("output-128k-2025-02-19")
|
292
|
+
|
293
|
+
# config that applies to all models
|
245
294
|
if config.timeout is not None:
|
246
295
|
params["timeout"] = float(config.timeout)
|
247
296
|
if config.stop_seqs is not None:
|
248
297
|
params["stop_sequences"] = config.stop_seqs
|
249
|
-
|
298
|
+
|
299
|
+
# return config
|
300
|
+
return params, headers, betas
|
250
301
|
|
251
302
|
@override
|
252
303
|
def max_tokens(self) -> int | None:
|
253
304
|
# anthropic requires you to explicitly specify max_tokens (most others
|
254
305
|
# set it to the maximum allowable output tokens for the model).
|
255
|
-
# set to 4096 which is the
|
306
|
+
# set to 4096 which is the highest possible for claude 3 (claude 3.5
|
307
|
+
# allows up to 8192)
|
256
308
|
return 4096
|
257
309
|
|
310
|
+
@override
|
311
|
+
def max_tokens_for_config(self, config: GenerateConfig) -> int | None:
|
312
|
+
max_tokens = cast(int, self.max_tokens())
|
313
|
+
if self.is_thinking_model() and config.reasoning_tokens is not None:
|
314
|
+
max_tokens = max_tokens + config.reasoning_tokens
|
315
|
+
return max_tokens
|
316
|
+
|
317
|
+
def is_using_thinking(self, config: GenerateConfig) -> bool:
|
318
|
+
return self.is_thinking_model() and config.reasoning_tokens is not None
|
319
|
+
|
320
|
+
def is_thinking_model(self) -> bool:
|
321
|
+
return not self.is_claude_3() and not self.is_claude_3_5()
|
322
|
+
|
323
|
+
def is_claude_3(self) -> bool:
|
324
|
+
return re.search(r"claude-3-[a-zA-Z]", self.model_name) is not None
|
325
|
+
|
326
|
+
def is_claude_3_5(self) -> bool:
|
327
|
+
return "claude-3-5-" in self.model_name
|
328
|
+
|
258
329
|
@override
|
259
330
|
def connection_key(self) -> str:
|
260
331
|
return str(self.api_key)
|
@@ -284,6 +355,14 @@ class AnthropicAPI(ModelAPI):
|
|
284
355
|
def tool_result_images(self) -> bool:
|
285
356
|
return True
|
286
357
|
|
358
|
+
@override
|
359
|
+
def emulate_reasoning_history(self) -> bool:
|
360
|
+
return False
|
361
|
+
|
362
|
+
@override
|
363
|
+
def force_reasoning_history(self) -> Literal["none", "all", "last"] | None:
|
364
|
+
return "all"
|
365
|
+
|
287
366
|
# convert some common BadRequestError states into 'refusal' model output
|
288
367
|
def handle_bad_request(self, ex: BadRequestError) -> ModelOutput | Exception:
|
289
368
|
error = exception_message(ex).lower()
|
@@ -487,7 +566,7 @@ def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
|
|
487
566
|
role = a["role"]
|
488
567
|
a_content = a["content"]
|
489
568
|
b_content = b["content"]
|
490
|
-
if isinstance(a_content, str) and isinstance(
|
569
|
+
if isinstance(a_content, str) and isinstance(b_content, str):
|
491
570
|
return MessageParam(role=role, content=f"{a_content}\n{b_content}")
|
492
571
|
elif isinstance(a_content, list) and isinstance(b_content, list):
|
493
572
|
return MessageParam(role=role, content=a_content + b_content)
|
@@ -503,9 +582,15 @@ def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
|
|
503
582
|
raise ValueError(f"Unexpected content types for messages: {a}, {b}")
|
504
583
|
|
505
584
|
|
506
|
-
def message_tool_choice(
|
585
|
+
def message_tool_choice(
|
586
|
+
tool_choice: ToolChoice, thinking_model: bool
|
587
|
+
) -> message_create_params.ToolChoice:
|
507
588
|
if isinstance(tool_choice, ToolFunction):
|
508
|
-
|
589
|
+
# forced tool use not compatible with thinking models
|
590
|
+
if thinking_model:
|
591
|
+
return {"type": "any"}
|
592
|
+
else:
|
593
|
+
return {"type": "tool", "name": tool_choice.name}
|
509
594
|
elif tool_choice == "any":
|
510
595
|
return {"type": "any"}
|
511
596
|
elif tool_choice == "none":
|
@@ -533,9 +618,15 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
533
618
|
# "tool" means serving a tool call result back to claude
|
534
619
|
elif message.role == "tool":
|
535
620
|
if message.error is not None:
|
536
|
-
content:
|
537
|
-
|
538
|
-
|
621
|
+
content: (
|
622
|
+
str
|
623
|
+
| list[
|
624
|
+
TextBlockParam
|
625
|
+
| ImageBlockParam
|
626
|
+
| ThinkingBlockParam
|
627
|
+
| RedactedThinkingBlockParam
|
628
|
+
]
|
629
|
+
) = message.error.message
|
539
630
|
# anthropic requires that content be populated when
|
540
631
|
# is_error is true (throws bad_request_error when not)
|
541
632
|
# so make sure this precondition is met
|
@@ -556,7 +647,7 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
556
647
|
ToolResultBlockParam(
|
557
648
|
tool_use_id=str(message.tool_call_id),
|
558
649
|
type="tool_result",
|
559
|
-
content=content,
|
650
|
+
content=cast(list[TextBlockParam | ImageBlockParam], content),
|
560
651
|
is_error=message.error is not None,
|
561
652
|
)
|
562
653
|
],
|
@@ -565,7 +656,13 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
565
656
|
# tool_calls means claude is attempting to call our tools
|
566
657
|
elif message.role == "assistant" and message.tool_calls:
|
567
658
|
# first include content (claude <thinking>)
|
568
|
-
tools_content: list[
|
659
|
+
tools_content: list[
|
660
|
+
TextBlockParam
|
661
|
+
| ThinkingBlockParam
|
662
|
+
| RedactedThinkingBlockParam
|
663
|
+
| ImageBlockParam
|
664
|
+
| ToolUseBlockParam
|
665
|
+
] = (
|
569
666
|
[TextBlockParam(type="text", text=message.content or NO_CONTENT)]
|
570
667
|
if isinstance(message.content, str)
|
571
668
|
else (
|
@@ -634,6 +731,16 @@ def model_output_from_message(message: Message, tools: list[ToolInfo]) -> ModelO
|
|
634
731
|
arguments=content_block.model_dump().get("input", {}),
|
635
732
|
)
|
636
733
|
)
|
734
|
+
elif isinstance(content_block, RedactedThinkingBlock):
|
735
|
+
content.append(
|
736
|
+
ContentReasoning(reasoning=content_block.data, redacted=True)
|
737
|
+
)
|
738
|
+
elif isinstance(content_block, ThinkingBlock):
|
739
|
+
content.append(
|
740
|
+
ContentReasoning(
|
741
|
+
reasoning=content_block.thinking, signature=content_block.signature
|
742
|
+
)
|
743
|
+
)
|
637
744
|
|
638
745
|
# resolve choice
|
639
746
|
choice = ChatCompletionChoice(
|
@@ -691,7 +798,7 @@ def split_system_messages(
|
|
691
798
|
|
692
799
|
async def message_param_content(
|
693
800
|
content: Content,
|
694
|
-
) -> TextBlockParam | ImageBlockParam:
|
801
|
+
) -> TextBlockParam | ImageBlockParam | ThinkingBlockParam | RedactedThinkingBlockParam:
|
695
802
|
if isinstance(content, ContentText):
|
696
803
|
return TextBlockParam(type="text", text=content.text or NO_CONTENT)
|
697
804
|
elif isinstance(content, ContentImage):
|
@@ -709,6 +816,18 @@ async def message_param_content(
|
|
709
816
|
type="image",
|
710
817
|
source=dict(type="base64", media_type=cast(Any, media_type), data=image),
|
711
818
|
)
|
819
|
+
elif isinstance(content, ContentReasoning):
|
820
|
+
if content.redacted:
|
821
|
+
return RedactedThinkingBlockParam(
|
822
|
+
type="redacted_thinking",
|
823
|
+
data=content.reasoning,
|
824
|
+
)
|
825
|
+
else:
|
826
|
+
if content.signature is None:
|
827
|
+
raise ValueError("Thinking content without signature.")
|
828
|
+
return ThinkingBlockParam(
|
829
|
+
type="thinking", thinking=content.reasoning, signature=content.signature
|
830
|
+
)
|
712
831
|
else:
|
713
832
|
raise RuntimeError(
|
714
833
|
"Anthropic models do not currently support audio or video inputs."
|
@@ -31,6 +31,7 @@ from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage
|
|
31
31
|
from .util import (
|
32
32
|
model_base_url,
|
33
33
|
)
|
34
|
+
from .util.tracker import BotoTimeTracker
|
34
35
|
|
35
36
|
# Model for Bedrock Converse API (Response)
|
36
37
|
# generated from: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#converse
|
@@ -256,6 +257,9 @@ class BedrockAPI(ModelAPI):
|
|
256
257
|
# Create a shared session to be used when generating
|
257
258
|
self.session = aioboto3.Session()
|
258
259
|
|
260
|
+
# create time tracker
|
261
|
+
self._time_tracker = BotoTimeTracker(self.session)
|
262
|
+
|
259
263
|
except ImportError:
|
260
264
|
raise pip_dependency_error("Bedrock API", ["aioboto3"])
|
261
265
|
|
@@ -313,6 +317,7 @@ class BedrockAPI(ModelAPI):
|
|
313
317
|
from botocore.exceptions import ClientError
|
314
318
|
|
315
319
|
# The bedrock client
|
320
|
+
request_id = self._time_tracker.start_request()
|
316
321
|
async with self.session.client( # type: ignore[call-overload]
|
317
322
|
service_name="bedrock-runtime",
|
318
323
|
endpoint_url=self.base_url,
|
@@ -325,6 +330,7 @@ class BedrockAPI(ModelAPI):
|
|
325
330
|
else DEFAULT_MAX_RETRIES,
|
326
331
|
mode="adaptive",
|
327
332
|
),
|
333
|
+
user_agent_extra=self._time_tracker.user_agent_extra(request_id),
|
328
334
|
),
|
329
335
|
**self.model_args,
|
330
336
|
) as client:
|
@@ -364,6 +370,7 @@ class BedrockAPI(ModelAPI):
|
|
364
370
|
request.model_dump(exclude_none=True)
|
365
371
|
),
|
366
372
|
response=response,
|
373
|
+
time=self._time_tracker.end_request(request_id),
|
367
374
|
)
|
368
375
|
|
369
376
|
try:
|
@@ -19,6 +19,7 @@ from .util import (
|
|
19
19
|
is_chat_api_rate_limit,
|
20
20
|
model_base_url,
|
21
21
|
)
|
22
|
+
from .util.tracker import HttpxTimeTracker
|
22
23
|
|
23
24
|
# https://developers.cloudflare.com/workers-ai/models/#text-generation
|
24
25
|
|
@@ -50,6 +51,7 @@ class CloudFlareAPI(ModelAPI):
|
|
50
51
|
if not self.api_key:
|
51
52
|
raise environment_prerequisite_error("CloudFlare", CLOUDFLARE_API_TOKEN)
|
52
53
|
self.client = httpx.AsyncClient()
|
54
|
+
self._time_tracker = HttpxTimeTracker(self.client)
|
53
55
|
base_url = model_base_url(base_url, "CLOUDFLARE_BASE_URL")
|
54
56
|
self.base_url = (
|
55
57
|
base_url if base_url else "https://api.cloudflare.com/client/v4/accounts"
|
@@ -76,12 +78,28 @@ class CloudFlareAPI(ModelAPI):
|
|
76
78
|
json["max_tokens"] = config.max_tokens
|
77
79
|
json["messages"] = chat_api_input(input, tools, self.chat_api_handler())
|
78
80
|
|
81
|
+
# request_id
|
82
|
+
request_id = self._time_tracker.start_request()
|
83
|
+
|
84
|
+
# setup response
|
85
|
+
response: dict[str, Any] = {}
|
86
|
+
|
87
|
+
def model_call() -> ModelCall:
|
88
|
+
return ModelCall.create(
|
89
|
+
request=json,
|
90
|
+
response=response,
|
91
|
+
time=self._time_tracker.end_request(request_id),
|
92
|
+
)
|
93
|
+
|
79
94
|
# make the call
|
80
95
|
response = await chat_api_request(
|
81
96
|
self.client,
|
82
97
|
model_name=self.model_name,
|
83
98
|
url=f"{chat_url}/{self.model_name}",
|
84
|
-
headers={
|
99
|
+
headers={
|
100
|
+
"Authorization": f"Bearer {self.api_key}",
|
101
|
+
HttpxTimeTracker.REQUEST_ID_HEADER: request_id,
|
102
|
+
},
|
85
103
|
json=json,
|
86
104
|
config=config,
|
87
105
|
)
|
@@ -102,13 +120,8 @@ class CloudFlareAPI(ModelAPI):
|
|
102
120
|
],
|
103
121
|
)
|
104
122
|
|
105
|
-
# record call
|
106
|
-
call = ModelCall.create(
|
107
|
-
request=dict(model_name=self.model_name, **json), response=response
|
108
|
-
)
|
109
|
-
|
110
123
|
# return
|
111
|
-
return output,
|
124
|
+
return output, model_call()
|
112
125
|
else:
|
113
126
|
error = str(response.get("errors", "Unknown"))
|
114
127
|
raise RuntimeError(f"Error calling {self.model_name}: {error}")
|
@@ -38,10 +38,13 @@ from pydantic import JsonValue
|
|
38
38
|
from typing_extensions import override
|
39
39
|
|
40
40
|
from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
|
41
|
-
from inspect_ai._util.content import
|
41
|
+
from inspect_ai._util.content import (
|
42
|
+
Content as InspectContent,
|
43
|
+
)
|
42
44
|
from inspect_ai._util.content import (
|
43
45
|
ContentAudio,
|
44
46
|
ContentImage,
|
47
|
+
ContentReasoning,
|
45
48
|
ContentText,
|
46
49
|
ContentVideo,
|
47
50
|
)
|
@@ -229,6 +232,8 @@ class GoogleGenAIAPI(ModelAPI):
|
|
229
232
|
response=response,
|
230
233
|
)
|
231
234
|
|
235
|
+
# TODO: would need to monkey patch AuthorizedSession.request
|
236
|
+
|
232
237
|
try:
|
233
238
|
response = await self.client.aio.models.generate_content(
|
234
239
|
model=self.model_name,
|
@@ -248,7 +253,10 @@ class GoogleGenAIAPI(ModelAPI):
|
|
248
253
|
|
249
254
|
@override
|
250
255
|
def is_rate_limit(self, ex: BaseException) -> bool:
|
251
|
-
|
256
|
+
# see https://cloud.google.com/storage/docs/retry-strategy
|
257
|
+
return isinstance(ex, APIError) and (
|
258
|
+
ex.code in (408, 429, 429) or ex.code >= 500
|
259
|
+
)
|
252
260
|
|
253
261
|
@override
|
254
262
|
def connection_key(self) -> str:
|
@@ -403,6 +411,8 @@ async def content_part(client: Client, content: InspectContent | str) -> Part:
|
|
403
411
|
return Part.from_text(text=content or NO_CONTENT)
|
404
412
|
elif isinstance(content, ContentText):
|
405
413
|
return Part.from_text(text=content.text or NO_CONTENT)
|
414
|
+
elif isinstance(content, ContentReasoning):
|
415
|
+
return Part.from_text(text=content.reasoning or NO_CONTENT)
|
406
416
|
else:
|
407
417
|
return await chat_content_to_part(client, content)
|
408
418
|
|
@@ -415,7 +425,8 @@ async def chat_content_to_part(
|
|
415
425
|
content_bytes, mime_type = await file_as_data(content.image)
|
416
426
|
return Part.from_bytes(mime_type=mime_type, data=content_bytes)
|
417
427
|
else:
|
418
|
-
|
428
|
+
file = await file_for_content(client, content)
|
429
|
+
return Part.from_uri(file_uri=file.uri, mime_type=file.mime_type)
|
419
430
|
|
420
431
|
|
421
432
|
async def extract_system_message_as_parts(
|
@@ -550,11 +561,19 @@ def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoi
|
|
550
561
|
# stop reason
|
551
562
|
stop_reason = finish_reason_to_stop_reason(candidate.finish_reason)
|
552
563
|
|
564
|
+
# choice content may include reasoning
|
565
|
+
if reasoning:
|
566
|
+
choice_content: str | list[Content] = [
|
567
|
+
ContentReasoning(reasoning=reasoning),
|
568
|
+
ContentText(text=content),
|
569
|
+
]
|
570
|
+
else:
|
571
|
+
choice_content = content
|
572
|
+
|
553
573
|
# build choice
|
554
574
|
choice = ChatCompletionChoice(
|
555
575
|
message=ChatMessageAssistant(
|
556
|
-
content=
|
557
|
-
reasoning=reasoning,
|
576
|
+
content=choice_content,
|
558
577
|
tool_calls=tool_calls if len(tool_calls) > 0 else None,
|
559
578
|
source="generate",
|
560
579
|
),
|
@@ -740,7 +759,7 @@ async def file_for_content(
|
|
740
759
|
uploaded_file = files_db.get(content_sha256)
|
741
760
|
if uploaded_file:
|
742
761
|
try:
|
743
|
-
upload: File = client.files.get(uploaded_file)
|
762
|
+
upload: File = client.files.get(name=uploaded_file)
|
744
763
|
if upload.state.name == "ACTIVE":
|
745
764
|
trace(f"Using uploaded file: {uploaded_file}")
|
746
765
|
return upload
|
@@ -752,10 +771,12 @@ async def file_for_content(
|
|
752
771
|
trace(f"Error attempting to access uploaded file: {ex}")
|
753
772
|
files_db.delete(content_sha256)
|
754
773
|
# do the upload (and record it)
|
755
|
-
upload = client.files.upload(
|
774
|
+
upload = client.files.upload(
|
775
|
+
file=BytesIO(content_bytes), config=dict(mime_type=mime_type)
|
776
|
+
)
|
756
777
|
while upload.state.name == "PROCESSING":
|
757
778
|
await asyncio.sleep(3)
|
758
|
-
upload = client.files.get(upload.name)
|
779
|
+
upload = client.files.get(name=upload.name)
|
759
780
|
if upload.state.name == "FAILED":
|
760
781
|
trace(f"Failed to upload file '{upload.name}: {upload.error}")
|
761
782
|
raise ValueError(f"Google file upload failed: {upload.error}")
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
+
from copy import copy
|
3
4
|
from typing import Any, Dict, Iterable, List, Optional
|
4
5
|
|
5
6
|
import httpx
|
@@ -19,10 +20,15 @@ from groq.types.chat import (
|
|
19
20
|
ChatCompletionToolMessageParam,
|
20
21
|
ChatCompletionUserMessageParam,
|
21
22
|
)
|
23
|
+
from pydantic import JsonValue
|
22
24
|
from typing_extensions import override
|
23
25
|
|
24
|
-
from inspect_ai._util.constants import
|
25
|
-
|
26
|
+
from inspect_ai._util.constants import (
|
27
|
+
BASE_64_DATA_REMOVED,
|
28
|
+
DEFAULT_MAX_RETRIES,
|
29
|
+
DEFAULT_MAX_TOKENS,
|
30
|
+
)
|
31
|
+
from inspect_ai._util.content import Content, ContentReasoning, ContentText
|
26
32
|
from inspect_ai._util.images import file_as_data_uri
|
27
33
|
from inspect_ai._util.url import is_http_url
|
28
34
|
from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
|
@@ -48,6 +54,7 @@ from .util import (
|
|
48
54
|
environment_prerequisite_error,
|
49
55
|
model_base_url,
|
50
56
|
)
|
57
|
+
from .util.tracker import HttpxTimeTracker
|
51
58
|
|
52
59
|
GROQ_API_KEY = "GROQ_API_KEY"
|
53
60
|
|
@@ -87,6 +94,9 @@ class GroqAPI(ModelAPI):
|
|
87
94
|
http_client=httpx.AsyncClient(limits=httpx.Limits(max_connections=None)),
|
88
95
|
)
|
89
96
|
|
97
|
+
# create time tracker
|
98
|
+
self._time_tracker = HttpxTimeTracker(self.client._client)
|
99
|
+
|
90
100
|
@override
|
91
101
|
async def close(self) -> None:
|
92
102
|
await self.client.close()
|
@@ -98,6 +108,21 @@ class GroqAPI(ModelAPI):
|
|
98
108
|
tool_choice: ToolChoice,
|
99
109
|
config: GenerateConfig,
|
100
110
|
) -> tuple[ModelOutput, ModelCall]:
|
111
|
+
# allocate request_id (so we can see it from ModelCall)
|
112
|
+
request_id = self._time_tracker.start_request()
|
113
|
+
|
114
|
+
# setup request and response for ModelCall
|
115
|
+
request: dict[str, Any] = {}
|
116
|
+
response: dict[str, Any] = {}
|
117
|
+
|
118
|
+
def model_call() -> ModelCall:
|
119
|
+
return ModelCall.create(
|
120
|
+
request=request,
|
121
|
+
response=response,
|
122
|
+
filter=model_call_filter,
|
123
|
+
time=self._time_tracker.end_request(request_id),
|
124
|
+
)
|
125
|
+
|
101
126
|
messages = await as_groq_chat_messages(input)
|
102
127
|
|
103
128
|
params = self.completion_params(config)
|
@@ -109,51 +134,52 @@ class GroqAPI(ModelAPI):
|
|
109
134
|
if config.parallel_tool_calls is not None:
|
110
135
|
params["parallel_tool_calls"] = config.parallel_tool_calls
|
111
136
|
|
112
|
-
|
137
|
+
request = dict(
|
113
138
|
messages=messages,
|
114
139
|
model=self.model_name,
|
140
|
+
extra_headers={HttpxTimeTracker.REQUEST_ID_HEADER: request_id},
|
115
141
|
**params,
|
116
142
|
)
|
117
143
|
|
144
|
+
completion: ChatCompletion = await self.client.chat.completions.create(
|
145
|
+
**request,
|
146
|
+
)
|
147
|
+
|
148
|
+
response = completion.model_dump()
|
149
|
+
|
118
150
|
# extract metadata
|
119
151
|
metadata: dict[str, Any] = {
|
120
|
-
"id":
|
121
|
-
"system_fingerprint":
|
122
|
-
"created":
|
152
|
+
"id": completion.id,
|
153
|
+
"system_fingerprint": completion.system_fingerprint,
|
154
|
+
"created": completion.created,
|
123
155
|
}
|
124
|
-
if
|
156
|
+
if completion.usage:
|
125
157
|
metadata = metadata | {
|
126
|
-
"queue_time":
|
127
|
-
"prompt_time":
|
128
|
-
"completion_time":
|
129
|
-
"total_time":
|
158
|
+
"queue_time": completion.usage.queue_time,
|
159
|
+
"prompt_time": completion.usage.prompt_time,
|
160
|
+
"completion_time": completion.usage.completion_time,
|
161
|
+
"total_time": completion.usage.total_time,
|
130
162
|
}
|
131
163
|
|
132
164
|
# extract output
|
133
|
-
choices = self._chat_choices_from_response(
|
165
|
+
choices = self._chat_choices_from_response(completion, tools)
|
134
166
|
output = ModelOutput(
|
135
|
-
model=
|
167
|
+
model=completion.model,
|
136
168
|
choices=choices,
|
137
169
|
usage=(
|
138
170
|
ModelUsage(
|
139
|
-
input_tokens=
|
140
|
-
output_tokens=
|
141
|
-
total_tokens=
|
171
|
+
input_tokens=completion.usage.prompt_tokens,
|
172
|
+
output_tokens=completion.usage.completion_tokens,
|
173
|
+
total_tokens=completion.usage.total_tokens,
|
142
174
|
)
|
143
|
-
if
|
175
|
+
if completion.usage
|
144
176
|
else None
|
145
177
|
),
|
146
178
|
metadata=metadata,
|
147
179
|
)
|
148
180
|
|
149
|
-
# record call
|
150
|
-
call = ModelCall.create(
|
151
|
-
request=dict(messages=messages, model=self.model_name, **params),
|
152
|
-
response=response.model_dump(),
|
153
|
-
)
|
154
|
-
|
155
181
|
# return
|
156
|
-
return output,
|
182
|
+
return output, model_call()
|
157
183
|
|
158
184
|
def completion_params(self, config: GenerateConfig) -> Dict[str, Any]:
|
159
185
|
params: dict[str, Any] = {}
|
@@ -300,10 +326,23 @@ def chat_tool_calls(message: Any, tools: list[ToolInfo]) -> Optional[List[ToolCa
|
|
300
326
|
def chat_message_assistant(message: Any, tools: list[ToolInfo]) -> ChatMessageAssistant:
|
301
327
|
reasoning = getattr(message, "reasoning", None)
|
302
328
|
if reasoning is not None:
|
303
|
-
|
329
|
+
content: str | list[Content] = [
|
330
|
+
ContentReasoning(reasoning=str(reasoning)),
|
331
|
+
ContentText(text=message.content or ""),
|
332
|
+
]
|
333
|
+
else:
|
334
|
+
content = message.content or ""
|
335
|
+
|
304
336
|
return ChatMessageAssistant(
|
305
|
-
content=
|
337
|
+
content=content,
|
306
338
|
source="generate",
|
307
339
|
tool_calls=chat_tool_calls(message, tools),
|
308
|
-
reasoning=reasoning,
|
309
340
|
)
|
341
|
+
|
342
|
+
|
343
|
+
def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
|
344
|
+
# remove base64 encoded images
|
345
|
+
if key == "image_url" and isinstance(value, dict):
|
346
|
+
value = copy(value)
|
347
|
+
value.update(url=BASE_64_DATA_REMOVED)
|
348
|
+
return value
|