inspect-ai 0.3.89__py3-none-any.whl → 0.3.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +40 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -53
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3583 -3508
  25. inspect_ai/_view/www/dist/assets/index.js +59212 -52521
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +13 -5
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +22 -8
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -9
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +83 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.91.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.89.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
@@ -56,17 +56,17 @@ from inspect_ai.tool._tool_info import ToolInfo
56
56
 
57
57
 
58
58
  async def openai_responses_inputs(
59
- messages: list[ChatMessage], model: str
59
+ messages: list[ChatMessage], model: str, store: bool
60
60
  ) -> list[ResponseInputItemParam]:
61
61
  return [
62
62
  item
63
63
  for message in messages
64
- for item in await _openai_input_item_from_chat_message(message, model)
64
+ for item in await _openai_input_item_from_chat_message(message, model, store)
65
65
  ]
66
66
 
67
67
 
68
68
  async def _openai_input_item_from_chat_message(
69
- message: ChatMessage, model: str
69
+ message: ChatMessage, model: str, store: bool
70
70
  ) -> list[ResponseInputItemParam]:
71
71
  if message.role == "system":
72
72
  content = await _openai_responses_content_list_param(message.content)
@@ -84,7 +84,7 @@ async def _openai_input_item_from_chat_message(
84
84
  )
85
85
  ]
86
86
  elif message.role == "assistant":
87
- return _openai_input_items_from_chat_message_assistant(message)
87
+ return _openai_input_items_from_chat_message_assistant(message, store)
88
88
  elif message.role == "tool":
89
89
  if message.internal:
90
90
  internal = _model_tool_call_for_internal(message.internal)
@@ -208,7 +208,7 @@ def openai_responses_chat_choices(
208
208
 
209
209
  class _AssistantInternal(TypedDict):
210
210
  output_message_id: str | None
211
- reasoning_id: str | None
211
+ tool_message_ids: dict[str, str]
212
212
 
213
213
 
214
214
  def _chat_message_assistant_from_openai_response(
@@ -237,7 +237,7 @@ def _chat_message_assistant_from_openai_response(
237
237
  # collect output and tool calls
238
238
  message_content: list[Content] = []
239
239
  tool_calls: list[ToolCall] = []
240
- internal = _AssistantInternal(output_message_id=None, reasoning_id=None)
240
+ internal = _AssistantInternal(output_message_id=None, tool_message_ids={})
241
241
  for output in response.output:
242
242
  match output:
243
243
  case ResponseOutputMessage(content=content, id=id):
@@ -252,24 +252,28 @@ def _chat_message_assistant_from_openai_response(
252
252
  ]
253
253
  )
254
254
  case ResponseReasoningItem(summary=summary, id=id):
255
- assert internal["reasoning_id"] is None, "Multiple reasoning items"
256
- internal["reasoning_id"] = id
257
255
  message_content.append(
258
- ContentReasoning(reasoning="\n".join([s.text for s in summary]))
256
+ ContentReasoning(
257
+ reasoning="\n".join([s.text for s in summary]), signature=id
258
+ )
259
259
  )
260
260
  case _:
261
261
  stop_reason = "tool_calls"
262
262
  match output:
263
263
  case ResponseFunctionToolCall():
264
+ if output.id is not None:
265
+ internal["tool_message_ids"][output.call_id] = output.id
264
266
  tool_calls.append(
265
267
  parse_tool_call(
266
268
  output.call_id,
267
- output.name,
269
+ _from_responses_tool_alias(output.name),
268
270
  output.arguments,
269
271
  tools,
270
272
  )
271
273
  )
272
274
  case ResponseComputerToolCall():
275
+ if output.id is not None:
276
+ internal["tool_message_ids"][output.call_id] = output.id
273
277
  tool_calls.append(
274
278
  tool_call_from_openai_computer_tool_call(output)
275
279
  )
@@ -290,7 +294,7 @@ def _chat_message_assistant_from_openai_response(
290
294
 
291
295
 
292
296
  def _openai_input_items_from_chat_message_assistant(
293
- message: ChatMessageAssistant,
297
+ message: ChatMessageAssistant, store: bool
294
298
  ) -> list[ResponseInputItemParam]:
295
299
  """
296
300
  Transform a `ChatMessageAssistant` into OpenAI `ResponseInputItem`'s for playback to the model.
@@ -300,12 +304,17 @@ def _openai_input_items_from_chat_message_assistant(
300
304
  field of the `ChatMessageAssistant` to help it provide the proper id's the
301
305
  items in the returned list.
302
306
  """
303
- # As currently coded, this code only supports a single OutputMessage and
304
- # a single ReasoningItem for each Response/ChatMessageAssistant.
305
- reasoning_item: ResponseReasoningItemParam | None = None
306
- output_message: ResponseOutputMessageParam | None = None
307
+ (output_message_id, tool_message_ids) = _ids_from_assistant_internal(message)
307
308
 
308
- (output_message_id, reasoning_id) = _ids_from_assistant_internal(message)
309
+ # if we are not storing messages on the server then blank these out
310
+ if not store:
311
+ output_message_id = None
312
+ tool_message_ids = {}
313
+
314
+ # items to return -- ensure we use a single output message (and just chain
315
+ # additional content on to it)
316
+ items: list[ResponseInputItemParam] = []
317
+ output_message: ResponseOutputMessageParam | None = None
309
318
 
310
319
  for content in (
311
320
  list[ContentText | ContentReasoning]([ContentText(text=message.content)])
@@ -316,13 +325,21 @@ def _openai_input_items_from_chat_message_assistant(
316
325
  ):
317
326
  match content:
318
327
  case ContentReasoning(reasoning=reasoning):
319
- assert reasoning_item is None, "Multiple reasoning items"
320
- assert reasoning_id is not None, "Must find reasoning id"
321
- reasoning_item = ResponseReasoningItemParam(
322
- type="reasoning",
323
- id=reasoning_id,
324
- summary=[Summary(type="summary_text", text=reasoning)],
328
+ assert content.signature is not None, (
329
+ "reasoning_id must be saved in signature"
325
330
  )
331
+ # if items are not stored on the server then there is no
332
+ # sense appending the reasoning item as its just a pointer
333
+ if store:
334
+ items.append(
335
+ ResponseReasoningItemParam(
336
+ type="reasoning",
337
+ id=content.signature,
338
+ summary=[Summary(type="summary_text", text=reasoning)]
339
+ if reasoning
340
+ else [],
341
+ )
342
+ )
326
343
  case ContentText(text=text, refusal=refusal):
327
344
  new_content = (
328
345
  ResponseOutputRefusalParam(type="refusal", refusal=text)
@@ -332,22 +349,23 @@ def _openai_input_items_from_chat_message_assistant(
332
349
  )
333
350
  )
334
351
  if output_message is None:
335
- assert output_message_id is not None, "Missing output message id"
336
352
  output_message = ResponseOutputMessageParam(
337
353
  type="message",
338
354
  role="assistant",
339
- id=output_message_id,
355
+ # this actually can be `None`, and it will in fact be `None` when the
356
+ # assistant message is synthesized by the scaffold as opposed to being
357
+ # replayed from the model (or when store=False)
358
+ id=output_message_id, # type: ignore[typeddict-item]
340
359
  content=[new_content],
341
360
  status="completed",
342
361
  )
362
+ items.append(output_message)
343
363
  else:
344
364
  output_message["content"] = chain(
345
365
  output_message["content"], [new_content]
346
366
  )
347
367
 
348
- return [
349
- item for item in (reasoning_item, output_message) if item
350
- ] + _tool_call_items_from_assistant_message(message)
368
+ return items + _tool_call_items_from_assistant_message(message, tool_message_ids)
351
369
 
352
370
 
353
371
  def _model_tool_call_for_internal(
@@ -380,7 +398,7 @@ def _maybe_native_tool_param(
380
398
 
381
399
 
382
400
  def _tool_call_items_from_assistant_message(
383
- message: ChatMessageAssistant,
401
+ message: ChatMessageAssistant, tool_message_ids: dict[str, str]
384
402
  ) -> list[ResponseInputItemParam]:
385
403
  tool_calls: list[ResponseInputItemParam] = []
386
404
  for call in message.tool_calls or []:
@@ -392,26 +410,36 @@ def _tool_call_items_from_assistant_message(
392
410
  )
393
411
  )
394
412
  else:
395
- tool_calls.append(
396
- ResponseFunctionToolCallParam(
397
- type="function_call",
398
- call_id=call.id,
399
- name=call.function,
400
- arguments=call.function,
401
- )
413
+ # create param
414
+ tool_call_param: ResponseFunctionToolCallParam = dict(
415
+ type="function_call",
416
+ call_id=call.id,
417
+ name=_responses_tool_alias(call.function),
418
+ arguments=call.function,
402
419
  )
403
420
 
421
+ # add id if available
422
+ tool_message_id = tool_message_ids.get(call.id, None)
423
+ if tool_message_id is not None:
424
+ tool_call_param["id"] = tool_message_id
425
+
426
+ # append the param
427
+ tool_calls.append(tool_call_param)
428
+
404
429
  return tool_calls
405
430
 
406
431
 
407
432
  def _ids_from_assistant_internal(
408
433
  message: ChatMessageAssistant,
409
- ) -> tuple[str | None, str | None]:
410
- assert isinstance(message.internal, dict), (
411
- "OpenAI ChatMessageAssistant internal must be an _AssistantInternal"
412
- )
413
- internal = cast(_AssistantInternal, message.internal)
414
- return (internal["output_message_id"], internal["reasoning_id"])
434
+ ) -> tuple[str | None, dict[str, str]]:
435
+ if message.internal is not None:
436
+ assert isinstance(message.internal, dict), (
437
+ "OpenAI ChatMessageAssistant internal must be an _AssistantInternal"
438
+ )
439
+ internal = cast(_AssistantInternal, message.internal)
440
+ return (internal["output_message_id"], internal["tool_message_ids"])
441
+ else:
442
+ return None, {}
415
443
 
416
444
 
417
445
  _ResponseToolCallParam = (
@@ -430,8 +458,21 @@ def _tool_param_for_tool_info(
430
458
  # standard tool implementation
431
459
  return _maybe_native_tool_param(tool, config) or FunctionToolParam(
432
460
  type="function",
433
- name=tool.name,
461
+ name=_responses_tool_alias(tool.name),
434
462
  description=tool.description,
435
463
  parameters=tool.parameters.model_dump(exclude_none=True),
436
464
  strict=False, # default parameters don't work in strict mode
437
465
  )
466
+
467
+
468
+ # these functions enables us to 'escape' built in tool names like 'python'
469
+
470
+ _responses_tool_aliases = {"python": "python_exec"}
471
+
472
+
473
+ def _responses_tool_alias(name: str) -> str:
474
+ return _responses_tool_aliases.get(name, name)
475
+
476
+
477
+ def _from_responses_tool_alias(name: str) -> str:
478
+ return next((k for k, v in _responses_tool_aliases.items() if v == name), name)
@@ -5,8 +5,6 @@ from copy import copy
5
5
  from logging import getLogger
6
6
  from typing import Any, Literal, Optional, Tuple, cast
7
7
 
8
- import httpcore
9
- import httpx
10
8
  from anthropic import (
11
9
  APIConnectionError,
12
10
  APIStatusError,
@@ -51,9 +49,11 @@ from inspect_ai._util.error import exception_message
51
49
  from inspect_ai._util.http import is_retryable_http_status
52
50
  from inspect_ai._util.images import file_as_data_uri
53
51
  from inspect_ai._util.logger import warn_once
52
+ from inspect_ai._util.trace import trace_message
54
53
  from inspect_ai._util.url import data_uri_mime_type, data_uri_to_base64
55
54
  from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
56
55
 
56
+ from ..._util.httpx import httpx_should_retry
57
57
  from .._chat_message import ChatMessage, ChatMessageAssistant, ChatMessageSystem
58
58
  from .._generate_config import GenerateConfig
59
59
  from .._model import ModelAPI
@@ -330,13 +330,9 @@ class AnthropicAPI(ModelAPI):
330
330
  def should_retry(self, ex: Exception) -> bool:
331
331
  if isinstance(ex, APIStatusError):
332
332
  return is_retryable_http_status(ex.status_code)
333
- elif isinstance(
334
- ex,
335
- APIConnectionError
336
- | APITimeoutError
337
- | httpx.RemoteProtocolError
338
- | httpcore.RemoteProtocolError,
339
- ):
333
+ elif httpx_should_retry(ex):
334
+ return True
335
+ elif isinstance(ex, APIConnectionError | APITimeoutError):
340
336
  return True
341
337
  else:
342
338
  return False
@@ -944,9 +940,15 @@ async def count_tokens(
944
940
  messages=[{"role": "user", "content": text}],
945
941
  )
946
942
  return response.input_tokens
947
- except Exception as e:
948
- logger.warning(
949
- f"Error counting tokens (falling back to estimated tokens): {str(e)}"
943
+ except Exception as ex:
944
+ warn_once(
945
+ logger,
946
+ f"Unable to call count_tokens API for model {model} (falling back to estimated tokens)",
947
+ )
948
+ trace_message(
949
+ logger,
950
+ "Anthropic",
951
+ f"Unable to call count_tokens API for model {model} ({ex})",
950
952
  )
951
953
  words = text.split()
952
954
  estimated_tokens = int(len(words) * 1.3)