inspect-ai 0.3.69__py3-none-any.whl → 0.3.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. inspect_ai/_cli/eval.py +27 -9
  2. inspect_ai/_display/core/display.py +2 -0
  3. inspect_ai/_display/core/footer.py +13 -3
  4. inspect_ai/_display/plain/display.py +6 -2
  5. inspect_ai/_display/rich/display.py +19 -6
  6. inspect_ai/_display/textual/app.py +9 -3
  7. inspect_ai/_display/textual/display.py +4 -0
  8. inspect_ai/_display/textual/widgets/samples.py +4 -10
  9. inspect_ai/_display/textual/widgets/transcript.py +35 -18
  10. inspect_ai/_eval/eval.py +14 -2
  11. inspect_ai/_eval/evalset.py +6 -1
  12. inspect_ai/_eval/run.py +6 -0
  13. inspect_ai/_eval/task/run.py +49 -23
  14. inspect_ai/_eval/task/task.py +26 -3
  15. inspect_ai/_util/content.py +20 -1
  16. inspect_ai/_util/interrupt.py +6 -0
  17. inspect_ai/_util/logger.py +19 -0
  18. inspect_ai/_util/rich.py +7 -8
  19. inspect_ai/_util/text.py +13 -0
  20. inspect_ai/_util/transcript.py +20 -6
  21. inspect_ai/_util/working.py +50 -0
  22. inspect_ai/_view/www/App.css +6 -0
  23. inspect_ai/_view/www/dist/assets/index.css +171 -99
  24. inspect_ai/_view/www/dist/assets/index.js +5972 -2770
  25. inspect_ai/_view/www/eslint.config.mjs +24 -1
  26. inspect_ai/_view/www/log-schema.json +619 -21
  27. inspect_ai/_view/www/package.json +8 -3
  28. inspect_ai/_view/www/src/App.tsx +2 -2
  29. inspect_ai/_view/www/src/appearance/icons.ts +3 -1
  30. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
  31. inspect_ai/_view/www/src/components/Card.tsx +9 -8
  32. inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
  33. inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
  34. inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
  35. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
  36. inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
  37. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
  38. inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
  39. inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
  40. inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
  41. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
  42. inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
  43. inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
  44. inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
  45. inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
  46. inspect_ai/_view/www/src/index.tsx +2 -2
  47. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
  48. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
  49. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
  50. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -1
  51. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
  52. inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
  53. inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
  54. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
  55. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
  56. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
  57. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
  58. inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
  59. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
  60. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +30 -3
  61. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  62. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
  64. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
  65. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
  66. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
  67. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
  68. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
  69. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
  70. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
  71. inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
  72. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
  73. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
  74. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
  75. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
  76. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
  77. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
  78. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
  79. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
  80. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
  81. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
  82. inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
  83. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
  84. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
  85. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
  86. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
  87. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
  88. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
  89. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
  90. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
  91. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
  92. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
  93. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
  94. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
  95. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
  96. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
  97. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
  98. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
  99. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
  100. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +9 -4
  101. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
  102. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
  103. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +153 -0
  104. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
  105. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -5
  106. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
  107. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
  108. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +53 -16
  109. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
  110. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
  111. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
  112. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
  113. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
  114. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
  115. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
  116. inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
  117. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
  118. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
  119. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
  120. inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
  121. inspect_ai/_view/www/src/types/log.d.ts +312 -137
  122. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
  123. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
  124. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
  125. inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
  126. inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
  127. inspect_ai/_view/www/src/utils/format.ts +8 -5
  128. inspect_ai/_view/www/src/utils/json.ts +24 -0
  129. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
  130. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +18 -8
  131. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
  132. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
  133. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
  134. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
  135. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
  136. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
  137. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
  138. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
  139. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
  140. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
  141. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
  142. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
  143. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
  144. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
  145. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
  146. inspect_ai/_view/www/yarn.lock +241 -5
  147. inspect_ai/log/__init__.py +2 -0
  148. inspect_ai/log/_condense.py +4 -0
  149. inspect_ai/log/_log.py +72 -12
  150. inspect_ai/log/_recorders/eval.py +6 -1
  151. inspect_ai/log/_samples.py +5 -1
  152. inspect_ai/log/_transcript.py +89 -2
  153. inspect_ai/model/__init__.py +2 -0
  154. inspect_ai/model/_call_tools.py +8 -1
  155. inspect_ai/model/_chat_message.py +22 -7
  156. inspect_ai/model/_conversation.py +11 -9
  157. inspect_ai/model/_generate_config.py +25 -4
  158. inspect_ai/model/_model.py +164 -72
  159. inspect_ai/model/_model_call.py +10 -3
  160. inspect_ai/model/_model_output.py +3 -0
  161. inspect_ai/model/_openai.py +106 -40
  162. inspect_ai/model/_providers/anthropic.py +145 -26
  163. inspect_ai/model/_providers/bedrock.py +7 -0
  164. inspect_ai/model/_providers/cloudflare.py +20 -7
  165. inspect_ai/model/_providers/google.py +29 -8
  166. inspect_ai/model/_providers/groq.py +66 -27
  167. inspect_ai/model/_providers/hf.py +6 -0
  168. inspect_ai/model/_providers/mistral.py +78 -51
  169. inspect_ai/model/_providers/openai.py +66 -4
  170. inspect_ai/model/_providers/openai_o1.py +10 -0
  171. inspect_ai/model/_providers/providers.py +2 -2
  172. inspect_ai/model/_providers/util/tracker.py +92 -0
  173. inspect_ai/model/_providers/vllm.py +13 -5
  174. inspect_ai/model/_reasoning.py +15 -2
  175. inspect_ai/scorer/_model.py +23 -19
  176. inspect_ai/solver/_basic_agent.py +1 -3
  177. inspect_ai/solver/_bridge/patch.py +0 -2
  178. inspect_ai/solver/_human_agent/agent.py +14 -10
  179. inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
  180. inspect_ai/solver/_human_agent/commands/submit.py +76 -30
  181. inspect_ai/solver/_limit.py +4 -4
  182. inspect_ai/solver/_plan.py +0 -3
  183. inspect_ai/solver/_task_state.py +7 -0
  184. inspect_ai/tool/__init__.py +2 -0
  185. inspect_ai/tool/_tool.py +3 -1
  186. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
  187. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
  188. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
  189. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
  190. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
  191. inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
  192. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
  193. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
  194. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
  195. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
  196. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
  197. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
  198. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
  199. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
  200. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
  201. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
  202. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
  203. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
  204. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
  205. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
  206. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
  207. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
  208. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
  209. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
  210. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
  211. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
  212. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
  213. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
  214. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
  215. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
  216. inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
  217. inspect_ai/tool/_tools/_web_search.py +3 -3
  218. inspect_ai/util/__init__.py +2 -1
  219. inspect_ai/util/_concurrency.py +14 -8
  220. inspect_ai/util/_display.py +12 -0
  221. inspect_ai/util/_sandbox/context.py +15 -0
  222. inspect_ai/util/_sandbox/docker/docker.py +7 -5
  223. inspect_ai/util/_sandbox/environment.py +32 -1
  224. inspect_ai/util/_sandbox/events.py +183 -0
  225. inspect_ai/util/_sandbox/local.py +3 -3
  226. inspect_ai/util/_sandbox/self_check.py +131 -43
  227. inspect_ai/util/_subtask.py +11 -0
  228. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/METADATA +3 -3
  229. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/RECORD +233 -211
  230. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/WHEEL +1 -1
  231. inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
  232. inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
  233. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
  234. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
  235. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
  236. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
  237. inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
  238. inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
  239. inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
  240. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/LICENSE +0 -0
  241. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/entry_points.txt +0 -0
  242. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,13 @@
1
1
  import functools
2
2
  import os
3
+ import re
3
4
  import sys
4
5
  from copy import copy
5
6
  from logging import getLogger
6
7
  from typing import Any, Literal, Tuple, TypedDict, cast
7
8
 
9
+ from .util.tracker import HttpxTimeTracker
10
+
8
11
  if sys.version_info >= (3, 11):
9
12
  from typing import NotRequired
10
13
  else:
@@ -26,8 +29,12 @@ from anthropic.types import (
26
29
  ImageBlockParam,
27
30
  Message,
28
31
  MessageParam,
32
+ RedactedThinkingBlock,
33
+ RedactedThinkingBlockParam,
29
34
  TextBlock,
30
35
  TextBlockParam,
36
+ ThinkingBlock,
37
+ ThinkingBlockParam,
31
38
  ToolParam,
32
39
  ToolResultBlockParam,
33
40
  ToolUseBlock,
@@ -42,7 +49,12 @@ from inspect_ai._util.constants import (
42
49
  DEFAULT_MAX_RETRIES,
43
50
  NO_CONTENT,
44
51
  )
45
- from inspect_ai._util.content import Content, ContentImage, ContentText
52
+ from inspect_ai._util.content import (
53
+ Content,
54
+ ContentImage,
55
+ ContentReasoning,
56
+ ContentText,
57
+ )
46
58
  from inspect_ai._util.error import exception_message
47
59
  from inspect_ai._util.images import file_as_data_uri
48
60
  from inspect_ai._util.logger import warn_once
@@ -150,6 +162,9 @@ class AnthropicAPI(ModelAPI):
150
162
  **model_args,
151
163
  )
152
164
 
165
+ # create time tracker
166
+ self._time_tracker = HttpxTimeTracker(self.client._client)
167
+
153
168
  @override
154
169
  async def close(self) -> None:
155
170
  await self.client.close()
@@ -167,6 +182,9 @@ class AnthropicAPI(ModelAPI):
167
182
  tool_choice: ToolChoice,
168
183
  config: GenerateConfig,
169
184
  ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
185
+ # allocate request_id (so we can see it from ModelCall)
186
+ request_id = self._time_tracker.start_request()
187
+
170
188
  # setup request and response for ModelCall
171
189
  request: dict[str, Any] = {}
172
190
  response: dict[str, Any] = {}
@@ -176,6 +194,7 @@ class AnthropicAPI(ModelAPI):
176
194
  request=request,
177
195
  response=response,
178
196
  filter=model_call_filter,
197
+ time=self._time_tracker.end_request(request_id),
179
198
  )
180
199
 
181
200
  # generate
@@ -195,21 +214,33 @@ class AnthropicAPI(ModelAPI):
195
214
  request["system"] = system_param
196
215
  request["tools"] = tools_param
197
216
  if len(tools) > 0:
198
- request["tool_choice"] = message_tool_choice(tool_choice)
217
+ request["tool_choice"] = message_tool_choice(
218
+ tool_choice, self.is_using_thinking(config)
219
+ )
199
220
 
200
221
  # additional options
201
- request = request | self.completion_params(config)
222
+ req, headers, betas = self.completion_config(config)
223
+ request = request | req
202
224
 
203
- # computer use beta
225
+ # extra headers (for time tracker and computer use)
226
+ extra_headers = headers | {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
204
227
  if computer_use:
205
- request["extra_headers"] = {"anthropic-beta": "computer-use-2024-10-22"}
228
+ betas.append("computer-use-2024-10-22")
229
+ if len(betas) > 0:
230
+ extra_headers["anthropic-beta"] = ",".join(betas)
231
+
232
+ request["extra_headers"] = extra_headers
206
233
 
207
234
  # extra_body
208
235
  if self.extra_body is not None:
209
236
  request["extra_body"] = self.extra_body
210
237
 
211
- # make request
212
- message = await self.client.messages.create(**request, stream=False)
238
+ # make request (stream if we are using reasoning)
239
+ if self.is_using_thinking(config):
240
+ async with self.client.messages.stream(**request) as stream:
241
+ message = await stream.get_final_message()
242
+ else:
243
+ message = await self.client.messages.create(**request, stream=False)
213
244
 
214
245
  # set response for ModelCall
215
246
  response = message.model_dump()
@@ -234,27 +265,67 @@ class AnthropicAPI(ModelAPI):
234
265
  else:
235
266
  raise ex
236
267
 
237
- def completion_params(self, config: GenerateConfig) -> dict[str, Any]:
238
- params = dict(model=self.model_name, max_tokens=cast(int, config.max_tokens))
239
- if config.temperature is not None:
240
- params["temperature"] = config.temperature
241
- if config.top_p is not None:
242
- params["top_p"] = config.top_p
243
- if config.top_k is not None:
244
- params["top_k"] = config.top_k
268
+ def completion_config(
269
+ self, config: GenerateConfig
270
+ ) -> tuple[dict[str, Any], dict[str, str], list[str]]:
271
+ max_tokens = cast(int, config.max_tokens)
272
+ params = dict(model=self.model_name, max_tokens=max_tokens)
273
+ headers: dict[str, str] = {}
274
+ betas: list[str] = []
275
+ # some params not compatible with thinking models
276
+ if not self.is_using_thinking(config):
277
+ if config.temperature is not None:
278
+ params["temperature"] = config.temperature
279
+ if config.top_p is not None:
280
+ params["top_p"] = config.top_p
281
+ if config.top_k is not None:
282
+ params["top_k"] = config.top_k
283
+
284
+ # some thinking-only stuff
285
+ if self.is_using_thinking(config):
286
+ params["thinking"] = dict(
287
+ type="enabled", budget_tokens=config.reasoning_tokens
288
+ )
289
+ headers["anthropic-version"] = "2023-06-01"
290
+ if max_tokens > 8192:
291
+ betas.append("output-128k-2025-02-19")
292
+
293
+ # config that applies to all models
245
294
  if config.timeout is not None:
246
295
  params["timeout"] = float(config.timeout)
247
296
  if config.stop_seqs is not None:
248
297
  params["stop_sequences"] = config.stop_seqs
249
- return params
298
+
299
+ # return config
300
+ return params, headers, betas
250
301
 
251
302
  @override
252
303
  def max_tokens(self) -> int | None:
253
304
  # anthropic requires you to explicitly specify max_tokens (most others
254
305
  # set it to the maximum allowable output tokens for the model).
255
- # set to 4096 which is the lowest documented max_tokens for claude models
306
+ # set to 4096 which is the highest possible for claude 3 (claude 3.5
307
+ # allows up to 8192)
256
308
  return 4096
257
309
 
310
+ @override
311
+ def max_tokens_for_config(self, config: GenerateConfig) -> int | None:
312
+ max_tokens = cast(int, self.max_tokens())
313
+ if self.is_thinking_model() and config.reasoning_tokens is not None:
314
+ max_tokens = max_tokens + config.reasoning_tokens
315
+ return max_tokens
316
+
317
+ def is_using_thinking(self, config: GenerateConfig) -> bool:
318
+ return self.is_thinking_model() and config.reasoning_tokens is not None
319
+
320
+ def is_thinking_model(self) -> bool:
321
+ return not self.is_claude_3() and not self.is_claude_3_5()
322
+
323
+ def is_claude_3(self) -> bool:
324
+ return re.search(r"claude-3-[a-zA-Z]", self.model_name) is not None
325
+
326
+ def is_claude_3_5(self) -> bool:
327
+ return "claude-3-5-" in self.model_name
328
+
258
329
  @override
259
330
  def connection_key(self) -> str:
260
331
  return str(self.api_key)
@@ -284,6 +355,14 @@ class AnthropicAPI(ModelAPI):
284
355
  def tool_result_images(self) -> bool:
285
356
  return True
286
357
 
358
+ @override
359
+ def emulate_reasoning_history(self) -> bool:
360
+ return False
361
+
362
+ @override
363
+ def force_reasoning_history(self) -> Literal["none", "all", "last"] | None:
364
+ return "all"
365
+
287
366
  # convert some common BadRequestError states into 'refusal' model output
288
367
  def handle_bad_request(self, ex: BadRequestError) -> ModelOutput | Exception:
289
368
  error = exception_message(ex).lower()
@@ -487,7 +566,7 @@ def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
487
566
  role = a["role"]
488
567
  a_content = a["content"]
489
568
  b_content = b["content"]
490
- if isinstance(a_content, str) and isinstance(a_content, str):
569
+ if isinstance(a_content, str) and isinstance(b_content, str):
491
570
  return MessageParam(role=role, content=f"{a_content}\n{b_content}")
492
571
  elif isinstance(a_content, list) and isinstance(b_content, list):
493
572
  return MessageParam(role=role, content=a_content + b_content)
@@ -503,9 +582,15 @@ def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
503
582
  raise ValueError(f"Unexpected content types for messages: {a}, {b}")
504
583
 
505
584
 
506
- def message_tool_choice(tool_choice: ToolChoice) -> message_create_params.ToolChoice:
585
+ def message_tool_choice(
586
+ tool_choice: ToolChoice, thinking_model: bool
587
+ ) -> message_create_params.ToolChoice:
507
588
  if isinstance(tool_choice, ToolFunction):
508
- return {"type": "tool", "name": tool_choice.name}
589
+ # forced tool use not compatible with thinking models
590
+ if thinking_model:
591
+ return {"type": "any"}
592
+ else:
593
+ return {"type": "tool", "name": tool_choice.name}
509
594
  elif tool_choice == "any":
510
595
  return {"type": "any"}
511
596
  elif tool_choice == "none":
@@ -533,9 +618,15 @@ async def message_param(message: ChatMessage) -> MessageParam:
533
618
  # "tool" means serving a tool call result back to claude
534
619
  elif message.role == "tool":
535
620
  if message.error is not None:
536
- content: str | list[TextBlockParam | ImageBlockParam] = (
537
- message.error.message
538
- )
621
+ content: (
622
+ str
623
+ | list[
624
+ TextBlockParam
625
+ | ImageBlockParam
626
+ | ThinkingBlockParam
627
+ | RedactedThinkingBlockParam
628
+ ]
629
+ ) = message.error.message
539
630
  # anthropic requires that content be populated when
540
631
  # is_error is true (throws bad_request_error when not)
541
632
  # so make sure this precondition is met
@@ -556,7 +647,7 @@ async def message_param(message: ChatMessage) -> MessageParam:
556
647
  ToolResultBlockParam(
557
648
  tool_use_id=str(message.tool_call_id),
558
649
  type="tool_result",
559
- content=content,
650
+ content=cast(list[TextBlockParam | ImageBlockParam], content),
560
651
  is_error=message.error is not None,
561
652
  )
562
653
  ],
@@ -565,7 +656,13 @@ async def message_param(message: ChatMessage) -> MessageParam:
565
656
  # tool_calls means claude is attempting to call our tools
566
657
  elif message.role == "assistant" and message.tool_calls:
567
658
  # first include content (claude <thinking>)
568
- tools_content: list[TextBlockParam | ImageBlockParam | ToolUseBlockParam] = (
659
+ tools_content: list[
660
+ TextBlockParam
661
+ | ThinkingBlockParam
662
+ | RedactedThinkingBlockParam
663
+ | ImageBlockParam
664
+ | ToolUseBlockParam
665
+ ] = (
569
666
  [TextBlockParam(type="text", text=message.content or NO_CONTENT)]
570
667
  if isinstance(message.content, str)
571
668
  else (
@@ -634,6 +731,16 @@ def model_output_from_message(message: Message, tools: list[ToolInfo]) -> ModelO
634
731
  arguments=content_block.model_dump().get("input", {}),
635
732
  )
636
733
  )
734
+ elif isinstance(content_block, RedactedThinkingBlock):
735
+ content.append(
736
+ ContentReasoning(reasoning=content_block.data, redacted=True)
737
+ )
738
+ elif isinstance(content_block, ThinkingBlock):
739
+ content.append(
740
+ ContentReasoning(
741
+ reasoning=content_block.thinking, signature=content_block.signature
742
+ )
743
+ )
637
744
 
638
745
  # resolve choice
639
746
  choice = ChatCompletionChoice(
@@ -691,7 +798,7 @@ def split_system_messages(
691
798
 
692
799
  async def message_param_content(
693
800
  content: Content,
694
- ) -> TextBlockParam | ImageBlockParam:
801
+ ) -> TextBlockParam | ImageBlockParam | ThinkingBlockParam | RedactedThinkingBlockParam:
695
802
  if isinstance(content, ContentText):
696
803
  return TextBlockParam(type="text", text=content.text or NO_CONTENT)
697
804
  elif isinstance(content, ContentImage):
@@ -709,6 +816,18 @@ async def message_param_content(
709
816
  type="image",
710
817
  source=dict(type="base64", media_type=cast(Any, media_type), data=image),
711
818
  )
819
+ elif isinstance(content, ContentReasoning):
820
+ if content.redacted:
821
+ return RedactedThinkingBlockParam(
822
+ type="redacted_thinking",
823
+ data=content.reasoning,
824
+ )
825
+ else:
826
+ if content.signature is None:
827
+ raise ValueError("Thinking content without signature.")
828
+ return ThinkingBlockParam(
829
+ type="thinking", thinking=content.reasoning, signature=content.signature
830
+ )
712
831
  else:
713
832
  raise RuntimeError(
714
833
  "Anthropic models do not currently support audio or video inputs."
@@ -31,6 +31,7 @@ from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage
31
31
  from .util import (
32
32
  model_base_url,
33
33
  )
34
+ from .util.tracker import BotoTimeTracker
34
35
 
35
36
  # Model for Bedrock Converse API (Response)
36
37
  # generated from: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#converse
@@ -256,6 +257,9 @@ class BedrockAPI(ModelAPI):
256
257
  # Create a shared session to be used when generating
257
258
  self.session = aioboto3.Session()
258
259
 
260
+ # create time tracker
261
+ self._time_tracker = BotoTimeTracker(self.session)
262
+
259
263
  except ImportError:
260
264
  raise pip_dependency_error("Bedrock API", ["aioboto3"])
261
265
 
@@ -313,6 +317,7 @@ class BedrockAPI(ModelAPI):
313
317
  from botocore.exceptions import ClientError
314
318
 
315
319
  # The bedrock client
320
+ request_id = self._time_tracker.start_request()
316
321
  async with self.session.client( # type: ignore[call-overload]
317
322
  service_name="bedrock-runtime",
318
323
  endpoint_url=self.base_url,
@@ -325,6 +330,7 @@ class BedrockAPI(ModelAPI):
325
330
  else DEFAULT_MAX_RETRIES,
326
331
  mode="adaptive",
327
332
  ),
333
+ user_agent_extra=self._time_tracker.user_agent_extra(request_id),
328
334
  ),
329
335
  **self.model_args,
330
336
  ) as client:
@@ -364,6 +370,7 @@ class BedrockAPI(ModelAPI):
364
370
  request.model_dump(exclude_none=True)
365
371
  ),
366
372
  response=response,
373
+ time=self._time_tracker.end_request(request_id),
367
374
  )
368
375
 
369
376
  try:
@@ -19,6 +19,7 @@ from .util import (
19
19
  is_chat_api_rate_limit,
20
20
  model_base_url,
21
21
  )
22
+ from .util.tracker import HttpxTimeTracker
22
23
 
23
24
  # https://developers.cloudflare.com/workers-ai/models/#text-generation
24
25
 
@@ -50,6 +51,7 @@ class CloudFlareAPI(ModelAPI):
50
51
  if not self.api_key:
51
52
  raise environment_prerequisite_error("CloudFlare", CLOUDFLARE_API_TOKEN)
52
53
  self.client = httpx.AsyncClient()
54
+ self._time_tracker = HttpxTimeTracker(self.client)
53
55
  base_url = model_base_url(base_url, "CLOUDFLARE_BASE_URL")
54
56
  self.base_url = (
55
57
  base_url if base_url else "https://api.cloudflare.com/client/v4/accounts"
@@ -76,12 +78,28 @@ class CloudFlareAPI(ModelAPI):
76
78
  json["max_tokens"] = config.max_tokens
77
79
  json["messages"] = chat_api_input(input, tools, self.chat_api_handler())
78
80
 
81
+ # request_id
82
+ request_id = self._time_tracker.start_request()
83
+
84
+ # setup response
85
+ response: dict[str, Any] = {}
86
+
87
+ def model_call() -> ModelCall:
88
+ return ModelCall.create(
89
+ request=json,
90
+ response=response,
91
+ time=self._time_tracker.end_request(request_id),
92
+ )
93
+
79
94
  # make the call
80
95
  response = await chat_api_request(
81
96
  self.client,
82
97
  model_name=self.model_name,
83
98
  url=f"{chat_url}/{self.model_name}",
84
- headers={"Authorization": f"Bearer {self.api_key}"},
99
+ headers={
100
+ "Authorization": f"Bearer {self.api_key}",
101
+ HttpxTimeTracker.REQUEST_ID_HEADER: request_id,
102
+ },
85
103
  json=json,
86
104
  config=config,
87
105
  )
@@ -102,13 +120,8 @@ class CloudFlareAPI(ModelAPI):
102
120
  ],
103
121
  )
104
122
 
105
- # record call
106
- call = ModelCall.create(
107
- request=dict(model_name=self.model_name, **json), response=response
108
- )
109
-
110
123
  # return
111
- return output, call
124
+ return output, model_call()
112
125
  else:
113
126
  error = str(response.get("errors", "Unknown"))
114
127
  raise RuntimeError(f"Error calling {self.model_name}: {error}")
@@ -38,10 +38,13 @@ from pydantic import JsonValue
38
38
  from typing_extensions import override
39
39
 
40
40
  from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
41
- from inspect_ai._util.content import Content as InspectContent
41
+ from inspect_ai._util.content import (
42
+ Content as InspectContent,
43
+ )
42
44
  from inspect_ai._util.content import (
43
45
  ContentAudio,
44
46
  ContentImage,
47
+ ContentReasoning,
45
48
  ContentText,
46
49
  ContentVideo,
47
50
  )
@@ -229,6 +232,8 @@ class GoogleGenAIAPI(ModelAPI):
229
232
  response=response,
230
233
  )
231
234
 
235
+ # TODO: would need to monkey patch AuthorizedSession.request
236
+
232
237
  try:
233
238
  response = await self.client.aio.models.generate_content(
234
239
  model=self.model_name,
@@ -248,7 +253,10 @@ class GoogleGenAIAPI(ModelAPI):
248
253
 
249
254
  @override
250
255
  def is_rate_limit(self, ex: BaseException) -> bool:
251
- return isinstance(ex, APIError) and ex.code in (429, 500, 503, 504)
256
+ # see https://cloud.google.com/storage/docs/retry-strategy
257
+ return isinstance(ex, APIError) and (
258
+ ex.code in (408, 429, 429) or ex.code >= 500
259
+ )
252
260
 
253
261
  @override
254
262
  def connection_key(self) -> str:
@@ -403,6 +411,8 @@ async def content_part(client: Client, content: InspectContent | str) -> Part:
403
411
  return Part.from_text(text=content or NO_CONTENT)
404
412
  elif isinstance(content, ContentText):
405
413
  return Part.from_text(text=content.text or NO_CONTENT)
414
+ elif isinstance(content, ContentReasoning):
415
+ return Part.from_text(text=content.reasoning or NO_CONTENT)
406
416
  else:
407
417
  return await chat_content_to_part(client, content)
408
418
 
@@ -415,7 +425,8 @@ async def chat_content_to_part(
415
425
  content_bytes, mime_type = await file_as_data(content.image)
416
426
  return Part.from_bytes(mime_type=mime_type, data=content_bytes)
417
427
  else:
418
- return await file_for_content(client, content)
428
+ file = await file_for_content(client, content)
429
+ return Part.from_uri(file_uri=file.uri, mime_type=file.mime_type)
419
430
 
420
431
 
421
432
  async def extract_system_message_as_parts(
@@ -550,11 +561,19 @@ def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoi
550
561
  # stop reason
551
562
  stop_reason = finish_reason_to_stop_reason(candidate.finish_reason)
552
563
 
564
+ # choice content may include reasoning
565
+ if reasoning:
566
+ choice_content: str | list[Content] = [
567
+ ContentReasoning(reasoning=reasoning),
568
+ ContentText(text=content),
569
+ ]
570
+ else:
571
+ choice_content = content
572
+
553
573
  # build choice
554
574
  choice = ChatCompletionChoice(
555
575
  message=ChatMessageAssistant(
556
- content=content,
557
- reasoning=reasoning,
576
+ content=choice_content,
558
577
  tool_calls=tool_calls if len(tool_calls) > 0 else None,
559
578
  source="generate",
560
579
  ),
@@ -740,7 +759,7 @@ async def file_for_content(
740
759
  uploaded_file = files_db.get(content_sha256)
741
760
  if uploaded_file:
742
761
  try:
743
- upload: File = client.files.get(uploaded_file)
762
+ upload: File = client.files.get(name=uploaded_file)
744
763
  if upload.state.name == "ACTIVE":
745
764
  trace(f"Using uploaded file: {uploaded_file}")
746
765
  return upload
@@ -752,10 +771,12 @@ async def file_for_content(
752
771
  trace(f"Error attempting to access uploaded file: {ex}")
753
772
  files_db.delete(content_sha256)
754
773
  # do the upload (and record it)
755
- upload = client.files.upload(BytesIO(content_bytes), mime_type=mime_type)
774
+ upload = client.files.upload(
775
+ file=BytesIO(content_bytes), config=dict(mime_type=mime_type)
776
+ )
756
777
  while upload.state.name == "PROCESSING":
757
778
  await asyncio.sleep(3)
758
- upload = client.files.get(upload.name)
779
+ upload = client.files.get(name=upload.name)
759
780
  if upload.state.name == "FAILED":
760
781
  trace(f"Failed to upload file '{upload.name}: {upload.error}")
761
782
  raise ValueError(f"Google file upload failed: {upload.error}")
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import os
3
+ from copy import copy
3
4
  from typing import Any, Dict, Iterable, List, Optional
4
5
 
5
6
  import httpx
@@ -19,10 +20,15 @@ from groq.types.chat import (
19
20
  ChatCompletionToolMessageParam,
20
21
  ChatCompletionUserMessageParam,
21
22
  )
23
+ from pydantic import JsonValue
22
24
  from typing_extensions import override
23
25
 
24
- from inspect_ai._util.constants import DEFAULT_MAX_RETRIES, DEFAULT_MAX_TOKENS
25
- from inspect_ai._util.content import Content
26
+ from inspect_ai._util.constants import (
27
+ BASE_64_DATA_REMOVED,
28
+ DEFAULT_MAX_RETRIES,
29
+ DEFAULT_MAX_TOKENS,
30
+ )
31
+ from inspect_ai._util.content import Content, ContentReasoning, ContentText
26
32
  from inspect_ai._util.images import file_as_data_uri
27
33
  from inspect_ai._util.url import is_http_url
28
34
  from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
@@ -48,6 +54,7 @@ from .util import (
48
54
  environment_prerequisite_error,
49
55
  model_base_url,
50
56
  )
57
+ from .util.tracker import HttpxTimeTracker
51
58
 
52
59
  GROQ_API_KEY = "GROQ_API_KEY"
53
60
 
@@ -87,6 +94,9 @@ class GroqAPI(ModelAPI):
87
94
  http_client=httpx.AsyncClient(limits=httpx.Limits(max_connections=None)),
88
95
  )
89
96
 
97
+ # create time tracker
98
+ self._time_tracker = HttpxTimeTracker(self.client._client)
99
+
90
100
  @override
91
101
  async def close(self) -> None:
92
102
  await self.client.close()
@@ -98,6 +108,21 @@ class GroqAPI(ModelAPI):
98
108
  tool_choice: ToolChoice,
99
109
  config: GenerateConfig,
100
110
  ) -> tuple[ModelOutput, ModelCall]:
111
+ # allocate request_id (so we can see it from ModelCall)
112
+ request_id = self._time_tracker.start_request()
113
+
114
+ # setup request and response for ModelCall
115
+ request: dict[str, Any] = {}
116
+ response: dict[str, Any] = {}
117
+
118
+ def model_call() -> ModelCall:
119
+ return ModelCall.create(
120
+ request=request,
121
+ response=response,
122
+ filter=model_call_filter,
123
+ time=self._time_tracker.end_request(request_id),
124
+ )
125
+
101
126
  messages = await as_groq_chat_messages(input)
102
127
 
103
128
  params = self.completion_params(config)
@@ -109,51 +134,52 @@ class GroqAPI(ModelAPI):
109
134
  if config.parallel_tool_calls is not None:
110
135
  params["parallel_tool_calls"] = config.parallel_tool_calls
111
136
 
112
- response: ChatCompletion = await self.client.chat.completions.create(
137
+ request = dict(
113
138
  messages=messages,
114
139
  model=self.model_name,
140
+ extra_headers={HttpxTimeTracker.REQUEST_ID_HEADER: request_id},
115
141
  **params,
116
142
  )
117
143
 
144
+ completion: ChatCompletion = await self.client.chat.completions.create(
145
+ **request,
146
+ )
147
+
148
+ response = completion.model_dump()
149
+
118
150
  # extract metadata
119
151
  metadata: dict[str, Any] = {
120
- "id": response.id,
121
- "system_fingerprint": response.system_fingerprint,
122
- "created": response.created,
152
+ "id": completion.id,
153
+ "system_fingerprint": completion.system_fingerprint,
154
+ "created": completion.created,
123
155
  }
124
- if response.usage:
156
+ if completion.usage:
125
157
  metadata = metadata | {
126
- "queue_time": response.usage.queue_time,
127
- "prompt_time": response.usage.prompt_time,
128
- "completion_time": response.usage.completion_time,
129
- "total_time": response.usage.total_time,
158
+ "queue_time": completion.usage.queue_time,
159
+ "prompt_time": completion.usage.prompt_time,
160
+ "completion_time": completion.usage.completion_time,
161
+ "total_time": completion.usage.total_time,
130
162
  }
131
163
 
132
164
  # extract output
133
- choices = self._chat_choices_from_response(response, tools)
165
+ choices = self._chat_choices_from_response(completion, tools)
134
166
  output = ModelOutput(
135
- model=response.model,
167
+ model=completion.model,
136
168
  choices=choices,
137
169
  usage=(
138
170
  ModelUsage(
139
- input_tokens=response.usage.prompt_tokens,
140
- output_tokens=response.usage.completion_tokens,
141
- total_tokens=response.usage.total_tokens,
171
+ input_tokens=completion.usage.prompt_tokens,
172
+ output_tokens=completion.usage.completion_tokens,
173
+ total_tokens=completion.usage.total_tokens,
142
174
  )
143
- if response.usage
175
+ if completion.usage
144
176
  else None
145
177
  ),
146
178
  metadata=metadata,
147
179
  )
148
180
 
149
- # record call
150
- call = ModelCall.create(
151
- request=dict(messages=messages, model=self.model_name, **params),
152
- response=response.model_dump(),
153
- )
154
-
155
181
  # return
156
- return output, call
182
+ return output, model_call()
157
183
 
158
184
  def completion_params(self, config: GenerateConfig) -> Dict[str, Any]:
159
185
  params: dict[str, Any] = {}
@@ -300,10 +326,23 @@ def chat_tool_calls(message: Any, tools: list[ToolInfo]) -> Optional[List[ToolCa
300
326
  def chat_message_assistant(message: Any, tools: list[ToolInfo]) -> ChatMessageAssistant:
301
327
  reasoning = getattr(message, "reasoning", None)
302
328
  if reasoning is not None:
303
- reasoning = str(reasoning)
329
+ content: str | list[Content] = [
330
+ ContentReasoning(reasoning=str(reasoning)),
331
+ ContentText(text=message.content or ""),
332
+ ]
333
+ else:
334
+ content = message.content or ""
335
+
304
336
  return ChatMessageAssistant(
305
- content=message.content or "",
337
+ content=content,
306
338
  source="generate",
307
339
  tool_calls=chat_tool_calls(message, tools),
308
- reasoning=reasoning,
309
340
  )
341
+
342
+
343
+ def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
344
+ # remove base64 encoded images
345
+ if key == "image_url" and isinstance(value, dict):
346
+ value = copy(value)
347
+ value.update(url=BASE_64_DATA_REMOVED)
348
+ return value