inspect-ai 0.3.89__py3-none-any.whl → 0.3.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +40 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -53
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3583 -3508
  25. inspect_ai/_view/www/dist/assets/index.js +59212 -52521
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +13 -5
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +22 -8
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -9
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +83 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.91.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.89.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
@@ -5,25 +5,27 @@ import os
5
5
  from copy import copy
6
6
  from io import BytesIO
7
7
  from logging import getLogger
8
- from typing import Any
8
+ from typing import Any, cast
9
9
 
10
10
  # SDK Docs: https://googleapis.github.io/python-genai/
11
11
  import anyio
12
- from google.genai import Client # type: ignore
13
- from google.genai.errors import APIError, ClientError # type: ignore
14
- from google.genai.types import ( # type: ignore
12
+ from google.genai import Client
13
+ from google.genai.errors import APIError, ClientError
14
+ from google.genai.types import (
15
15
  Candidate,
16
16
  Content,
17
+ ContentListUnion,
18
+ ContentListUnionDict,
17
19
  File,
18
20
  FinishReason,
19
21
  FunctionCallingConfig,
22
+ FunctionCallingConfigMode,
20
23
  FunctionDeclaration,
21
24
  FunctionResponse,
22
25
  GenerateContentConfig,
23
26
  GenerateContentResponse,
24
27
  GenerateContentResponsePromptFeedback,
25
28
  GenerateContentResponseUsageMetadata,
26
- GenerationConfig,
27
29
  HarmBlockThreshold,
28
30
  HarmCategory,
29
31
  HttpOptions,
@@ -31,8 +33,10 @@ from google.genai.types import ( # type: ignore
31
33
  SafetySetting,
32
34
  SafetySettingDict,
33
35
  Schema,
36
+ ThinkingConfig,
34
37
  Tool,
35
38
  ToolConfig,
39
+ ToolListUnion,
36
40
  Type,
37
41
  )
38
42
  from pydantic import JsonValue
@@ -89,13 +93,28 @@ GOOGLE_API_KEY = "GOOGLE_API_KEY"
89
93
  VERTEX_API_KEY = "VERTEX_API_KEY"
90
94
 
91
95
  SAFETY_SETTINGS = "safety_settings"
92
- DEFAULT_SAFETY_SETTINGS = {
93
- HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: HarmBlockThreshold.BLOCK_NONE,
94
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
95
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
96
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
97
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
98
- }
96
+ DEFAULT_SAFETY_SETTINGS: list[SafetySettingDict] = [
97
+ {
98
+ "category": HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
99
+ "threshold": HarmBlockThreshold.BLOCK_NONE,
100
+ },
101
+ {
102
+ "category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
103
+ "threshold": HarmBlockThreshold.BLOCK_NONE,
104
+ },
105
+ {
106
+ "category": HarmCategory.HARM_CATEGORY_HARASSMENT,
107
+ "threshold": HarmBlockThreshold.BLOCK_NONE,
108
+ },
109
+ {
110
+ "category": HarmCategory.HARM_CATEGORY_HATE_SPEECH,
111
+ "threshold": HarmBlockThreshold.BLOCK_NONE,
112
+ },
113
+ {
114
+ "category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
115
+ "threshold": HarmBlockThreshold.BLOCK_NONE,
116
+ },
117
+ ]
99
118
 
100
119
 
101
120
  class GoogleGenAIAPI(ModelAPI):
@@ -105,6 +124,7 @@ class GoogleGenAIAPI(ModelAPI):
105
124
  base_url: str | None,
106
125
  api_key: str | None,
107
126
  config: GenerateConfig = GenerateConfig(),
127
+ api_version: str | None = None,
108
128
  **model_args: Any,
109
129
  ) -> None:
110
130
  super().__init__(
@@ -115,12 +135,30 @@ class GoogleGenAIAPI(ModelAPI):
115
135
  config=config,
116
136
  )
117
137
 
138
+ # record api version
139
+ self.api_version = api_version
140
+
118
141
  # pick out user-provided safety settings and merge against default
119
- self.safety_settings = DEFAULT_SAFETY_SETTINGS.copy()
142
+ self.safety_settings: list[SafetySettingDict] = DEFAULT_SAFETY_SETTINGS.copy()
120
143
  if SAFETY_SETTINGS in model_args:
121
- self.safety_settings.update(
122
- parse_safety_settings(model_args.get(SAFETY_SETTINGS))
144
+
145
+ def update_safety_setting(
146
+ category: HarmCategory, threshold: HarmBlockThreshold
147
+ ) -> None:
148
+ for setting in self.safety_settings:
149
+ if setting["category"] == category:
150
+ setting["threshold"] = threshold
151
+ break
152
+
153
+ user_safety_settings = parse_safety_settings(
154
+ model_args.get(SAFETY_SETTINGS)
123
155
  )
156
+ for safety_setting in user_safety_settings:
157
+ if safety_setting["category"] and safety_setting["threshold"]:
158
+ update_safety_setting(
159
+ safety_setting["category"], safety_setting["threshold"]
160
+ )
161
+
124
162
  del model_args[SAFETY_SETTINGS]
125
163
 
126
164
  # extract any service prefix from model name
@@ -196,7 +234,10 @@ class GoogleGenAIAPI(ModelAPI):
196
234
  client = Client(
197
235
  vertexai=self.is_vertex(),
198
236
  api_key=self.api_key,
199
- http_options={"base_url": self.base_url},
237
+ http_options={
238
+ "base_url": self.base_url,
239
+ "api_version": self.api_version,
240
+ },
200
241
  **self.model_args,
201
242
  )
202
243
 
@@ -221,7 +262,8 @@ class GoogleGenAIAPI(ModelAPI):
221
262
  safety_settings=safety_settings_to_list(self.safety_settings),
222
263
  tools=gemini_tools,
223
264
  tool_config=gemini_tool_config,
224
- system_instruction=await extract_system_message_as_parts(client, input),
265
+ system_instruction=await extract_system_message_as_parts(client, input), # type: ignore[arg-type]
266
+ thinking_config=self.chat_thinking_config(config),
225
267
  )
226
268
  if config.response_schema is not None:
227
269
  parameters.response_mime_type = "application/json"
@@ -233,7 +275,7 @@ class GoogleGenAIAPI(ModelAPI):
233
275
 
234
276
  def model_call() -> ModelCall:
235
277
  return build_model_call(
236
- contents=gemini_contents,
278
+ contents=gemini_contents, # type: ignore[arg-type]
237
279
  safety_settings=self.safety_settings,
238
280
  generation_config=parameters,
239
281
  tools=gemini_tools,
@@ -245,7 +287,7 @@ class GoogleGenAIAPI(ModelAPI):
245
287
  try:
246
288
  response = await client.aio.models.generate_content(
247
289
  model=self.service_model_name(),
248
- contents=gemini_contents,
290
+ contents=gemini_contents, # type: ignore[arg-type]
249
291
  config=parameters,
250
292
  )
251
293
  except ClientError as ex:
@@ -264,6 +306,15 @@ class GoogleGenAIAPI(ModelAPI):
264
306
  """Model name without any service prefix."""
265
307
  return self.model_name.replace(f"{self.service}/", "", 1)
266
308
 
309
+ def is_gemini(self) -> bool:
310
+ return "gemini-" in self.service_model_name()
311
+
312
+ def is_gemini_1_5(self) -> bool:
313
+ return "gemini-1.5" in self.service_model_name()
314
+
315
+ def is_gemini_2_0(self) -> bool:
316
+ return "gemini-2.0" in self.service_model_name()
317
+
267
318
  @override
268
319
  def should_retry(self, ex: Exception) -> bool:
269
320
  if isinstance(ex, APIError) and ex.code is not None:
@@ -293,22 +344,35 @@ class GoogleGenAIAPI(ModelAPI):
293
344
  else:
294
345
  raise ex
295
346
 
347
+ def chat_thinking_config(self, config: GenerateConfig) -> ThinkingConfig | None:
348
+ # thinking_config is only supported for gemini 2.5 above
349
+ has_thinking_config = (
350
+ self.is_gemini() and not self.is_gemini_1_5() and not self.is_gemini_2_0()
351
+ )
352
+ if has_thinking_config:
353
+ return ThinkingConfig(
354
+ include_thoughts=True, thinking_budget=config.reasoning_tokens
355
+ )
356
+ else:
357
+ return None
358
+
296
359
 
297
- def safety_settings_to_list(safety_settings: SafetySettingDict) -> list[SafetySetting]:
298
- return [
299
- SafetySetting(
300
- category=category,
301
- threshold=threshold,
360
+ def safety_settings_to_list(
361
+ safety_settings: list[SafetySettingDict],
362
+ ) -> list[SafetySetting]:
363
+ settings: list[SafetySetting] = []
364
+ for setting in safety_settings:
365
+ settings.append(
366
+ SafetySetting(category=setting["category"], threshold=setting["threshold"])
302
367
  )
303
- for category, threshold in safety_settings.items()
304
- ]
368
+ return settings
305
369
 
306
370
 
307
371
  def build_model_call(
308
- contents: list[Content],
309
- generation_config: GenerationConfig,
310
- safety_settings: SafetySettingDict,
311
- tools: list[Tool] | None,
372
+ contents: ContentListUnion | ContentListUnionDict,
373
+ generation_config: GenerateContentConfig,
374
+ safety_settings: list[SafetySettingDict],
375
+ tools: ToolListUnion | None,
312
376
  tool_config: ToolConfig | None,
313
377
  response: GenerateContentResponse | None,
314
378
  time: float | None,
@@ -364,7 +428,7 @@ def consecutive_tool_message_reducer(
364
428
  and messages[-1].role == "function"
365
429
  ):
366
430
  messages[-1] = Content(
367
- role="function", parts=messages[-1].parts + message.parts
431
+ role="function", parts=(messages[-1].parts or []) + (message.parts or [])
368
432
  )
369
433
  else:
370
434
  messages.append(message)
@@ -443,14 +507,16 @@ async def chat_content_to_part(
443
507
  return Part.from_bytes(mime_type=mime_type, data=content_bytes)
444
508
  else:
445
509
  file = await file_for_content(client, content)
510
+ if file.uri is None:
511
+ raise RuntimeError(f"Failed to get URI for file: {file.display_name}")
446
512
  return Part.from_uri(file_uri=file.uri, mime_type=file.mime_type)
447
513
 
448
514
 
449
515
  async def extract_system_message_as_parts(
450
516
  client: Client,
451
517
  messages: list[ChatMessage],
452
- ) -> list[Part] | None:
453
- system_parts: list[Part] = []
518
+ ) -> list[File | Part | str] | None:
519
+ system_parts: list[File | Part | str] = []
454
520
  for message in messages:
455
521
  if message.role == "system":
456
522
  content = message.content
@@ -466,7 +532,7 @@ async def extract_system_message_as_parts(
466
532
  return system_parts or None
467
533
 
468
534
 
469
- def chat_tools(tools: list[ToolInfo]) -> list[Tool]:
535
+ def chat_tools(tools: list[ToolInfo]) -> ToolListUnion:
470
536
  declarations = [
471
537
  FunctionDeclaration(
472
538
  name=tool.name,
@@ -502,6 +568,27 @@ def schema_from_param(
502
568
  type=Type.BOOLEAN, description=param.description, nullable=nullable
503
569
  )
504
570
  elif param.type == "string":
571
+ if param.format == "date-time":
572
+ return Schema(
573
+ type=Type.STRING,
574
+ description=param.description,
575
+ format="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$",
576
+ nullable=nullable,
577
+ )
578
+ elif param.format == "date":
579
+ return Schema(
580
+ type=Type.STRING,
581
+ description=param.description,
582
+ format="^[0-9]{4}-[0-9]{2}-[0-9]{2}$",
583
+ nullable=nullable,
584
+ )
585
+ elif param.format == "time":
586
+ return Schema(
587
+ type=Type.STRING,
588
+ description=param.description,
589
+ format="^[0-9]{2}:[0-9]{2}:[0-9]{2}$",
590
+ nullable=nullable,
591
+ )
505
592
  return Schema(
506
593
  type=Type.STRING, description=param.description, nullable=nullable
507
594
  )
@@ -538,12 +625,15 @@ def chat_tool_config(tool_choice: ToolChoice) -> ToolConfig:
538
625
  if isinstance(tool_choice, ToolFunction):
539
626
  return ToolConfig(
540
627
  function_calling_config=FunctionCallingConfig(
541
- mode="ANY", allowed_function_names=[tool_choice.name]
628
+ mode=FunctionCallingConfigMode.ANY,
629
+ allowed_function_names=[tool_choice.name],
542
630
  )
543
631
  )
544
632
  else:
545
633
  return ToolConfig(
546
- function_calling_config=FunctionCallingConfig(mode=tool_choice.upper())
634
+ function_calling_config=FunctionCallingConfig(
635
+ mode=cast(FunctionCallingConfigMode, tool_choice.upper())
636
+ )
547
637
  )
548
638
 
549
639
 
@@ -552,51 +642,57 @@ def completion_choice_from_candidate(
552
642
  ) -> ChatCompletionChoice:
553
643
  # content can be None when the finish_reason is SAFETY
554
644
  if candidate.content is None:
555
- content = ""
645
+ content: (
646
+ str
647
+ | list[
648
+ ContentText
649
+ | ContentReasoning
650
+ | ContentImage
651
+ | ContentAudio
652
+ | ContentVideo
653
+ ]
654
+ ) = ""
556
655
  # content.parts can be None when the finish_reason is MALFORMED_FUNCTION_CALL
557
656
  elif candidate.content.parts is None:
558
657
  content = ""
559
658
  else:
560
- content = " ".join(
561
- [
562
- part.text
563
- for part in candidate.content.parts
564
- if part.text is not None and candidate.content is not None
565
- ]
566
- )
567
-
568
- # split reasoning
569
- reasoning, content = split_reasoning(content)
659
+ content = []
660
+ for part in candidate.content.parts:
661
+ if part.text is not None:
662
+ if part.thought is True:
663
+ content.append(ContentReasoning(reasoning=part.text))
664
+ else:
665
+ content.append(ContentText(text=part.text))
570
666
 
571
667
  # now tool calls
572
668
  tool_calls: list[ToolCall] = []
573
669
  if candidate.content is not None and candidate.content.parts is not None:
574
670
  for part in candidate.content.parts:
575
671
  if part.function_call:
576
- tool_calls.append(
577
- ToolCall(
578
- id=part.function_call.name,
579
- function=part.function_call.name,
580
- arguments=part.function_call.args,
672
+ if (
673
+ part.function_call is not None
674
+ and part.function_call.name is not None
675
+ and part.function_call.args is not None
676
+ ):
677
+ tool_calls.append(
678
+ ToolCall(
679
+ id=part.function_call.name,
680
+ function=part.function_call.name,
681
+ arguments=part.function_call.args,
682
+ )
581
683
  )
582
- )
684
+ else:
685
+ raise ValueError(f"Incomplete function call: {part.function_call}")
583
686
 
584
687
  # stop reason
585
- stop_reason = finish_reason_to_stop_reason(candidate.finish_reason)
586
-
587
- # choice content may include reasoning
588
- if reasoning:
589
- choice_content: str | list[Content] = [
590
- ContentReasoning(reasoning=reasoning),
591
- ContentText(text=content),
592
- ]
593
- else:
594
- choice_content = content
688
+ stop_reason = finish_reason_to_stop_reason(
689
+ candidate.finish_reason or FinishReason.STOP
690
+ )
595
691
 
596
692
  # build choice
597
693
  choice = ChatCompletionChoice(
598
694
  message=ChatMessageAssistant(
599
- content=choice_content,
695
+ content=content,
600
696
  tool_calls=tool_calls if len(tool_calls) > 0 else None,
601
697
  model=model,
602
698
  source="generate",
@@ -607,21 +703,27 @@ def completion_choice_from_candidate(
607
703
  # add logprobs if provided
608
704
  if candidate.logprobs_result:
609
705
  logprobs: list[Logprob] = []
610
- for chosen, top in zip(
611
- candidate.logprobs_result.chosen_candidates,
612
- candidate.logprobs_result.top_candidates,
706
+ if (
707
+ candidate.logprobs_result.chosen_candidates
708
+ and candidate.logprobs_result.top_candidates
613
709
  ):
614
- logprobs.append(
615
- Logprob(
616
- token=chosen.token,
617
- logprob=chosen.log_probability,
618
- top_logprobs=[
619
- TopLogprob(token=c.token, logprob=c.log_probability)
620
- for c in top.candidates
621
- ],
622
- )
623
- )
624
- choice.logprobs = Logprobs(content=logprobs)
710
+ for chosen, top in zip(
711
+ candidate.logprobs_result.chosen_candidates,
712
+ candidate.logprobs_result.top_candidates,
713
+ ):
714
+ if chosen.token and chosen.log_probability:
715
+ logprobs.append(
716
+ Logprob(
717
+ token=chosen.token,
718
+ logprob=chosen.log_probability,
719
+ top_logprobs=[
720
+ TopLogprob(token=c.token, logprob=c.log_probability)
721
+ for c in (top.candidates or [])
722
+ if c.token and c.log_probability
723
+ ],
724
+ )
725
+ )
726
+ choice.logprobs = Logprobs(content=logprobs)
625
727
 
626
728
  return choice
627
729
 
@@ -632,7 +734,7 @@ def completion_choices_from_candidates(
632
734
  ) -> list[ChatCompletionChoice]:
633
735
  candidates = response.candidates
634
736
  if candidates:
635
- candidates_list = sorted(candidates, key=lambda c: c.index)
737
+ candidates_list = sorted(candidates, key=lambda c: c.index or 0)
636
738
  return [
637
739
  completion_choice_from_candidate(model, candidate)
638
740
  for candidate in candidates_list
@@ -661,15 +763,6 @@ def completion_choices_from_candidates(
661
763
  ]
662
764
 
663
765
 
664
- def split_reasoning(content: str) -> tuple[str | None, str]:
665
- separator = "\nFinal Answer: "
666
- if separator in content:
667
- parts = content.split(separator, 1) # dplit only on first occurrence
668
- return parts[0].strip(), separator.lstrip() + parts[1].strip()
669
- else:
670
- return None, content.strip()
671
-
672
-
673
766
  def prompt_feedback_to_content(
674
767
  feedback: GenerateContentResponsePromptFeedback,
675
768
  ) -> str:
@@ -687,7 +780,7 @@ def prompt_feedback_to_content(
687
780
 
688
781
 
689
782
  def usage_metadata_to_model_usage(
690
- metadata: GenerateContentResponseUsageMetadata,
783
+ metadata: GenerateContentResponseUsageMetadata | None,
691
784
  ) -> ModelUsage | None:
692
785
  if metadata is None:
693
786
  return None
@@ -695,6 +788,7 @@ def usage_metadata_to_model_usage(
695
788
  input_tokens=metadata.prompt_token_count or 0,
696
789
  output_tokens=metadata.candidates_token_count or 0,
697
790
  total_tokens=metadata.total_token_count or 0,
791
+ reasoning_tokens=metadata.thoughts_token_count or 0,
698
792
  )
699
793
 
700
794
 
@@ -720,14 +814,14 @@ def finish_reason_to_stop_reason(finish_reason: FinishReason) -> StopReason:
720
814
 
721
815
  def parse_safety_settings(
722
816
  safety_settings: Any,
723
- ) -> dict[HarmCategory, HarmBlockThreshold]:
817
+ ) -> list[SafetySettingDict]:
724
818
  # ensure we have a dict
725
819
  if isinstance(safety_settings, str):
726
820
  safety_settings = json.loads(safety_settings)
727
821
  if not isinstance(safety_settings, dict):
728
822
  raise ValueError(f"{SAFETY_SETTINGS} must be dictionary.")
729
823
 
730
- parsed_settings: dict[HarmCategory, HarmBlockThreshold] = {}
824
+ parsed_settings: list[SafetySettingDict] = []
731
825
  for key, value in safety_settings.items():
732
826
  if not isinstance(key, str):
733
827
  raise ValueError(f"Unexpected type for harm category: {key}")
@@ -735,7 +829,7 @@ def parse_safety_settings(
735
829
  raise ValueError(f"Unexpected type for harm block threshold: {value}")
736
830
  key = str_to_harm_category(key)
737
831
  value = str_to_harm_block_threshold(value)
738
- parsed_settings[key] = value
832
+ parsed_settings.append({"category": key, "threshold": value})
739
833
  return parsed_settings
740
834
 
741
835
 
@@ -795,6 +889,7 @@ async def file_for_content(
795
889
  if uploaded_file:
796
890
  try:
797
891
  upload: File = client.files.get(name=uploaded_file)
892
+ assert upload.state
798
893
  if upload.state.name == "ACTIVE":
799
894
  trace(f"Using uploaded file: {uploaded_file}")
800
895
  return upload
@@ -809,14 +904,15 @@ async def file_for_content(
809
904
  upload = client.files.upload(
810
905
  file=BytesIO(content_bytes), config=dict(mime_type=mime_type)
811
906
  )
812
- while upload.state.name == "PROCESSING":
907
+ while upload.state.name == "PROCESSING": # type: ignore[union-attr]
813
908
  await anyio.sleep(3)
909
+ assert upload.name
814
910
  upload = client.files.get(name=upload.name)
815
- if upload.state.name == "FAILED":
911
+ if upload.state.name == "FAILED": # type: ignore[union-attr]
816
912
  trace(f"Failed to upload file '{upload.name}: {upload.error}")
817
913
  raise ValueError(f"Google file upload failed: {upload.error}")
818
914
  # trace and record it
819
915
  trace(f"Uploaded file: {upload.name}")
820
- files_db.put(content_sha256, upload.name)
916
+ files_db.put(content_sha256, str(upload.name))
821
917
  # return the file
822
918
  return upload
@@ -347,7 +347,7 @@ def chat_completion_assistant_message(
347
347
 
348
348
  def set_random_seeds(seed: int | None = None) -> None:
349
349
  if seed is None:
350
- seed = np.random.default_rng().integers(2**32 - 1)
350
+ seed = np.random.default_rng().integers(2**32 - 1) # type: ignore
351
351
  # python hash seed
352
352
  os.environ["PYTHONHASHSEED"] = str(seed)
353
353
  # transformers seed
@@ -3,8 +3,6 @@ import json
3
3
  import os
4
4
  from typing import Any, Literal
5
5
 
6
- from httpcore import ReadTimeout
7
- from httpx import ReadTimeout as AsyncReadTimeout
8
6
  from mistralai import (
9
7
  ContentChunk,
10
8
  DocumentURLChunk,
@@ -51,6 +49,7 @@ from inspect_ai._util.http import is_retryable_http_status
51
49
  from inspect_ai._util.images import file_as_data_uri
52
50
  from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
53
51
 
52
+ from ..._util.httpx import httpx_should_retry
54
53
  from .._call_tools import parse_tool_call
55
54
  from .._chat_message import (
56
55
  ChatMessage,
@@ -235,7 +234,7 @@ class MistralAPI(ModelAPI):
235
234
  def should_retry(self, ex: Exception) -> bool:
236
235
  if isinstance(ex, SDKError):
237
236
  return is_retryable_http_status(ex.status_code)
238
- elif isinstance(ex, ReadTimeout | AsyncReadTimeout):
237
+ elif httpx_should_retry(ex):
239
238
  return True
240
239
  else:
241
240
  return False