inspect-ai 0.3.90__py3-none-any.whl → 0.3.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +40 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -41
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3640 -3563
  25. inspect_ai/_view/www/dist/assets/index.js +59204 -52519
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +83 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.91.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.90.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
@@ -4,197 +4,173 @@ This module provides helper code for handling JSON-RPC communication between the
4
4
  It includes definitions for JSON-RPC request and response models, as well as functions to create and parse JSON-RPC requests and responses.
5
5
  """
6
6
 
7
- import json
8
- from itertools import count
9
7
  from textwrap import dedent
10
- from typing import Literal, Type, TypeVar, cast
11
-
12
- from pydantic import BaseModel, RootModel
8
+ from typing import Type
13
9
 
14
10
  from inspect_ai._util.error import PrerequisiteError
15
- from inspect_ai.tool._tool import ToolError, ToolParsingError
16
11
  from inspect_ai.util import sandbox_with
17
12
  from inspect_ai.util._sandbox.environment import SandboxEnvironment
18
13
 
14
+ from ._json_rpc_helpers import (
15
+ BaseModelT,
16
+ JSONRPCParamsType,
17
+ JSONRPCTransport,
18
+ ScalarT,
19
+ _rpc_call_description,
20
+ create_json_rpc_request,
21
+ )
22
+ from ._json_rpc_helpers import exec_model_request as model_request
23
+ from ._json_rpc_helpers import exec_notification as notification_helper
24
+ from ._json_rpc_helpers import exec_scalar_request as scalar_request
19
25
 
20
- class JSONRPCResponseBase(BaseModel):
21
- jsonrpc: Literal["2.0"]
22
- id: int | float | str
23
-
24
-
25
- class JSONRPCSuccessResponse(JSONRPCResponseBase):
26
- result: object
27
-
28
-
29
- class JSONRPCError(BaseModel):
30
- """See: https://www.jsonrpc.org/specification#error_object"""
31
-
32
- code: int
33
- message: str
34
- data: object | None = None
35
-
36
-
37
- class JSONRPCErrorResponse(JSONRPCResponseBase):
38
- error: JSONRPCError
39
-
40
-
41
- class JSONRPCResponse(RootModel[JSONRPCSuccessResponse | JSONRPCErrorResponse]):
42
- pass
43
26
 
27
+ async def exec_scalar_request(
28
+ sandbox: SandboxEnvironment,
29
+ method: str,
30
+ params: JSONRPCParamsType,
31
+ result_type: Type[ScalarT],
32
+ timeout: int | None = None,
33
+ user: str | None = None,
34
+ ) -> ScalarT:
35
+ return await scalar_request(
36
+ method,
37
+ params,
38
+ result_type,
39
+ transport=ToolSupportSandboxTransport(sandbox, timeout, user),
40
+ )
44
41
 
45
- BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
46
- StrOrModelT = TypeVar("StrOrModelT", bound=str | BaseModel)
47
42
 
48
- id_generator = count(666)
43
+ async def exec_model_request(
44
+ sandbox: SandboxEnvironment,
45
+ method: str,
46
+ params: JSONRPCParamsType,
47
+ result_type: Type[BaseModelT],
48
+ timeout: int | None = None,
49
+ user: str | None = None,
50
+ ) -> BaseModelT:
51
+ return await model_request(
52
+ method,
53
+ params,
54
+ result_type,
55
+ transport=ToolSupportSandboxTransport(sandbox, timeout, user),
56
+ )
49
57
 
50
58
 
51
- async def exec_sandbox_rpc(
59
+ async def exec_notification(
52
60
  sandbox: SandboxEnvironment,
53
61
  method: str,
54
- params: dict[str, object] | tuple[object, ...],
55
- result_cls: Type[StrOrModelT],
62
+ params: JSONRPCParamsType,
56
63
  timeout: int | None = None,
57
64
  user: str | None = None,
58
- ) -> StrOrModelT:
59
- """
60
- Execute a JSON-RPC command to a sandbox environment.
61
-
62
- Note that the JSON RPC request is sent to the exec'ed program via stdin.
65
+ ) -> None:
66
+ return await notification_helper(
67
+ method, params, transport=ToolSupportSandboxTransport(sandbox, timeout, user)
68
+ )
63
69
 
64
- Args:
65
- sandbox (SandboxEnvironment): The sandbox environment to execute the command in.
66
- method (str): The JSON-RPC method to call.
67
- params (dict[str, object] | tuple[object, ...]): The parameters for the JSON-RPC method.
68
- result_cls (Type[BaseModelT]): The class to use for parsing the result.
69
- timeout (int | None, optional): The timeout for the execution. Defaults to None.
70
- user: Optional username or UID to run the command as.
71
70
 
72
- Returns:
73
- BaseModelT: The parsed result of the JSON-RPC call.
71
+ class ToolSupportSandboxTransport(JSONRPCTransport):
72
+ """
73
+ A transport callable that uses a sandbox for RPC communication.
74
74
 
75
- Raises:
76
- RuntimeError: If the sandbox execution fails or if there is an error in the JSON-RPC response.
77
- ToolParsingError: If the JSON-RPC response contains a specific error code indicating a parsing error.
75
+ This class implements the TransportCallable protocol and encapsulates
76
+ the sandbox, timeout, and user parameters needed for sandbox-based
77
+ RPC communication.
78
78
  """
79
- exec_result = await sandbox.exec(
80
- [SANDBOX_CLI, "exec"],
81
- input=_create_json_rpc_request(method, params),
82
- timeout=timeout,
83
- user=user,
84
- )
85
79
 
86
- if not exec_result.success:
87
- raise RuntimeError(
88
- f"Sandbox.exec failure executing {_rpc_call_description(method, params)}: {exec_result.stderr}"
80
+ def __init__(
81
+ self,
82
+ sandbox: SandboxEnvironment,
83
+ timeout: int | None = None,
84
+ user: str | None = None,
85
+ ):
86
+ """
87
+ Initialize a new SandboxTransportCallable.
88
+
89
+ Args:
90
+ sandbox (SandboxEnvironment): The sandbox environment to use.
91
+ timeout (int | None, optional): The timeout for executions. Defaults to None.
92
+ user (str | None, optional): Username or UID to run commands as. Defaults to None.
93
+ """
94
+ self.sandbox = sandbox
95
+ self.timeout = timeout
96
+ self.user = user
97
+
98
+ async def __call__(
99
+ self, method: str, params: JSONRPCParamsType, is_notification: bool
100
+ ) -> str:
101
+ """
102
+ Execute an RPC request using the sandbox transport.
103
+
104
+ Args:
105
+ method (str): The JSON-RPC method to call.
106
+ params (dict[str, object] | tuple[object, ...]): The parameters for the JSON-RPC method.
107
+ is_notification (bool): Whether this is a notification (no response expected).
108
+
109
+ Returns:
110
+ str: The response from the RPC call.
111
+
112
+ Raises:
113
+ RuntimeError: If the sandbox execution fails.
114
+ """
115
+ exec_result = await self.sandbox.exec(
116
+ [SANDBOX_CLI, "exec"],
117
+ input=create_json_rpc_request(method, params, is_notification),
118
+ timeout=self.timeout,
119
+ user=self.user,
89
120
  )
90
121
 
91
- match _parse_json_rpc_response(exec_result.stdout, result_cls):
92
- case JSONRPCError(code=-32601 | -32602, message=message):
93
- raise ToolParsingError(message)
94
- case JSONRPCError(code=-32000, message=message):
95
- raise ToolError(message)
96
- case JSONRPCError(code=code, message=message):
97
- raise RuntimeError(
98
- f"Error executing tool command {_rpc_call_description(method, params)}: {code=} {message}"
99
- )
100
- # case result_cls() as model: yields a mypy error since it has narrowed model down
101
- # to BaseModel and not BaseModelT. ???
102
- case model if isinstance(model, result_cls):
103
- return model
104
- case not_possible:
122
+ if not exec_result.success:
105
123
  raise RuntimeError(
106
- f"Error executing tool command {_rpc_call_description(method, params)}: {not_possible}"
124
+ f"Sandbox.exec failure executing {_rpc_call_description(method, params)}: {exec_result.stderr}"
107
125
  )
126
+ return exec_result.stdout
108
127
 
109
128
 
110
129
  SANDBOX_CLI = "inspect-tool-support"
111
130
  INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB = "aisiuk/inspect-tool-support"
112
131
 
113
132
 
114
- async def tool_container_sandbox(tool_name: str) -> SandboxEnvironment:
115
- sb = await sandbox_with(SANDBOX_CLI, True)
116
- if sb:
133
+ async def tool_container_sandbox(
134
+ tool_name: str, *, sandbox_name: str | None = None
135
+ ) -> SandboxEnvironment:
136
+ if sb := await sandbox_with(SANDBOX_CLI, True, name=sandbox_name):
117
137
  return sb
118
- else:
119
- msg = dedent(f"""
120
- The {tool_name} service was not found in any of the sandboxes for this sample. Please add the {tool_name} to your configuration.
121
-
122
- For example, the following Docker compose file uses the {INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB} reference image as its default sandbox:
123
-
124
- services:
125
- default:
126
- image: "{INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB}"
127
- init: true
128
-
129
- Alternatively, you can include the service into your own Dockerfile:
130
-
131
- ENV PATH="$PATH:/opt/inspect_tool_support/bin"
132
- RUN python -m venv /opt/inspect_tool_support && \\
133
- /opt/inspect_tool_support/bin/pip install inspect-tool-support && \\
134
- /opt/inspect_tool_support/bin/inspect-tool-support post-install
135
- """).strip()
136
- raise PrerequisiteError(msg)
137
-
138
-
139
- def _create_json_rpc_request(
140
- method: str, params: dict[str, object] | tuple[object, ...]
141
- ) -> str:
142
- return json.dumps(
143
- {
144
- "jsonrpc": "2.0",
145
- "method": method,
146
- "id": next(id_generator),
147
- "params": list(params) if isinstance(params, tuple) else params,
148
- }
138
+
139
+ # This sort of programmatic sentence building will not cut it if we ever
140
+ # support other languages.
141
+ raise PrerequisiteError(
142
+ dedent(f"""
143
+ The {tool_name} service was not found in {"any of the sandboxes" if sandbox_name is None else f"the sandbox '{sandbox_name}'"} for this sample. Please add the {tool_name} to your configuration.
144
+
145
+ For example, the following Docker compose file uses the {INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB} reference image as its default sandbox:
146
+
147
+ services:
148
+ default:
149
+ image: "{INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB}"
150
+ init: true
151
+
152
+ Alternatively, you can include the service into your own Dockerfile:
153
+
154
+ ENV PATH="$PATH:/opt/inspect_tool_support/bin"
155
+ RUN python -m venv /opt/inspect_tool_support && \\
156
+ /opt/inspect_tool_support/bin/pip install inspect-tool-support && \\
157
+ /opt/inspect_tool_support/bin/inspect-tool-support post-install
158
+ """).strip()
149
159
  )
150
160
 
151
161
 
152
- def _rpc_call_description(
153
- method: str, params: dict[str, object] | tuple[object, ...]
154
- ) -> str:
162
+ def create_sandbox_transport(
163
+ sandbox: SandboxEnvironment, timeout: int | None = None, user: str | None = None
164
+ ) -> JSONRPCTransport:
155
165
  """
156
- Generate a string description of an RPC call.
166
+ Create a transport callable that uses a sandbox for RPC communication.
157
167
 
158
168
  Args:
159
- method (str): The name of the RPC method.
160
- params (dict[str, object] | tuple[object, ...]): The parameters for the RPC method.
169
+ sandbox (SandboxEnvironment): The sandbox environment to use.
170
+ timeout (int | None, optional): The timeout for executions. Defaults to None.
171
+ user (str | None, optional): Username or UID to run commands as. Defaults to None.
161
172
 
162
173
  Returns:
163
- str: A string description of the RPC call.
164
-
165
- Examples:
166
- >>> _rpc_call_description("subtract", {"minuend": 42, "subtrahend": 23})
167
- 'subtract(minuend: 42, subtrahend: 23)'
168
-
169
- >>> _rpc_call_description("subtract", (42, 23))
170
- 'subtract(42, 23)'
174
+ TransportCallable: A transport callable that conforms to the TransportCallable protocol.
171
175
  """
172
- normalized_params = (
173
- list(map(str, params))
174
- if isinstance(params, tuple)
175
- else [f"{k}: {v}" for k, v in params.items()]
176
- )
177
- return f"{method}({', '.join(normalized_params)})"
178
-
179
-
180
- def _parse_json_rpc_response(
181
- response_str: str,
182
- result_cls: Type[StrOrModelT],
183
- ) -> StrOrModelT | JSONRPCError:
184
- match JSONRPCResponse.model_validate_json(response_str).root:
185
- case JSONRPCErrorResponse(error=error):
186
- return error
187
- case JSONRPCSuccessResponse(result=rpc_result):
188
- # TODO: Wow. Is there really no way to convince Python to narrow these types
189
- # and avoid the cast's
190
- if result_cls is str:
191
- if not isinstance(rpc_result, str):
192
- raise ValueError(f"Expected string result, got {type(rpc_result)}")
193
- return cast(StrOrModelT, rpc_result)
194
- else:
195
- return cast(
196
- StrOrModelT,
197
- cast(BaseModel, result_cls).model_validate(rpc_result, strict=True),
198
- )
199
- case _:
200
- raise ValueError(f"Unexpected JSON RPC response: {response_str}")
176
+ return ToolSupportSandboxTransport(sandbox=sandbox, timeout=timeout, user=user)
@@ -3,7 +3,7 @@ from shortuuid import uuid
3
3
 
4
4
  from inspect_ai.tool import ToolResult
5
5
  from inspect_ai.tool._tool_support_helpers import (
6
- exec_sandbox_rpc,
6
+ exec_model_request,
7
7
  tool_container_sandbox,
8
8
  )
9
9
  from inspect_ai.util import StoreModel, store_as
@@ -98,11 +98,11 @@ def bash_session(*, timeout: int | None = None, instance: str | None = uuid()) -
98
98
 
99
99
  if not store.session_id:
100
100
  store.session_id = (
101
- await exec_sandbox_rpc(
102
- sandbox,
103
- "bash_session_new_session",
104
- {},
105
- NewSessionResult,
101
+ await exec_model_request(
102
+ sandbox=sandbox,
103
+ method="bash_session_new_session",
104
+ params={},
105
+ result_type=NewSessionResult,
106
106
  timeout=timeout,
107
107
  )
108
108
  ).session_name
@@ -110,11 +110,11 @@ def bash_session(*, timeout: int | None = None, instance: str | None = uuid()) -
110
110
  params["session_name"] = store.session_id
111
111
 
112
112
  result = (
113
- await exec_sandbox_rpc(
114
- sandbox,
115
- "bash_session",
116
- params,
117
- BashResult,
113
+ await exec_model_request(
114
+ sandbox=sandbox,
115
+ method="bash_session",
116
+ params=params,
117
+ result_type=BashResult,
118
118
  timeout=timeout,
119
119
  )
120
120
  ).root
@@ -5,7 +5,7 @@ from pydantic import BaseModel, Discriminator, RootModel
5
5
 
6
6
  from inspect_ai.tool import ToolResult
7
7
  from inspect_ai.tool._tool_support_helpers import (
8
- exec_sandbox_rpc,
8
+ exec_scalar_request,
9
9
  tool_container_sandbox,
10
10
  )
11
11
 
@@ -110,11 +110,11 @@ def text_editor(timeout: int | None = None, user: str | None = None) -> Tool:
110
110
  if k in inspect.signature(execute).parameters
111
111
  }
112
112
 
113
- return await exec_sandbox_rpc(
114
- sandbox,
115
- "text_editor",
116
- params,
117
- TextEditorResult,
113
+ return await exec_scalar_request(
114
+ sandbox=sandbox,
115
+ method="text_editor",
116
+ params=params,
117
+ result_type=TextEditorResult,
118
118
  timeout=timeout,
119
119
  )
120
120
 
@@ -9,7 +9,7 @@ from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
9
9
  from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
10
10
  from inspect_ai.tool._tool_info import parse_tool_info
11
11
  from inspect_ai.tool._tool_support_helpers import (
12
- exec_sandbox_rpc,
12
+ exec_model_request,
13
13
  tool_container_sandbox,
14
14
  )
15
15
  from inspect_ai.tool._tool_with import tool_with
@@ -414,18 +414,18 @@ async def _web_browser_cmd(
414
414
 
415
415
  if not store.session_id:
416
416
  store.session_id = (
417
- await exec_sandbox_rpc(
418
- sandbox_env,
419
- "web_new_session",
420
- {"headful": False},
421
- NewSessionResult,
417
+ await exec_model_request(
418
+ sandbox=sandbox_env,
419
+ method="web_new_session",
420
+ params={"headful": False},
421
+ result_type=NewSessionResult,
422
422
  )
423
423
  ).session_name
424
424
 
425
425
  params["session_name"] = store.session_id
426
426
 
427
- crawler_result = await exec_sandbox_rpc(
428
- sandbox_env, tool_name, params, CrawlerResult
427
+ crawler_result = await exec_model_request(
428
+ sandbox=sandbox_env, method=tool_name, params=params, result_type=CrawlerResult
429
429
  )
430
430
  if crawler_result.error and crawler_result.error.strip() != "":
431
431
  raise ToolError(crawler_result.error)
inspect_ai/util/_anyio.py CHANGED
@@ -1,3 +1,4 @@
1
+ import itertools
1
2
  import sys
2
3
 
3
4
  if sys.version_info < (3, 11):
@@ -5,23 +6,33 @@ if sys.version_info < (3, 11):
5
6
 
6
7
 
7
8
  def inner_exception(exc: Exception) -> Exception:
8
- flattended = flatten_exception_group(exc)
9
- return flattended[0]
10
-
11
-
12
- def flatten_exception_group(exc: Exception) -> list[Exception]:
13
- """Recursively flatten an ExceptionGroup to get all contained exceptions."""
14
- if (
15
- hasattr(exc, "__context__")
16
- and exc.__context__ is not None
17
- and isinstance(exc.__context__, Exception)
18
- ):
19
- return flatten_exception_group(exc.__context__) + [exc]
20
-
21
- if isinstance(exc, ExceptionGroup):
22
- flattened = []
23
- for nested_exc in exc.exceptions:
24
- flattened.extend(flatten_exception_group(nested_exc))
25
- return flattened
26
-
27
- return [exc]
9
+ return _flatten_exception(exc)[0]
10
+
11
+
12
+ def _flatten_exception(exc: Exception) -> list[Exception]:
13
+ """Recursively flatten an exception to get all related (__context__) and contained (ExceptionGroup) exceptions."""
14
+ context_to_follow = (
15
+ [exc.__context__]
16
+ # conceptually, if __cause__ is present, it means that this exception
17
+ # wraps the cause - rather than cause being a separate error. We'll
18
+ # follow __context__ only if __cause__ is None
19
+ if exc.__cause__ is None and isinstance(exc.__context__, Exception)
20
+ else []
21
+ )
22
+
23
+ (maybe_this_exception, children_to_follow) = (
24
+ ([], exc.exceptions)
25
+ # if it's a group, follow the children discarding the group
26
+ if isinstance(exc, ExceptionGroup)
27
+ else ([exc], [])
28
+ )
29
+
30
+ # We have to use a set since the same exception is likely to be included in
31
+ # both __context__ and .exceptions
32
+ other_exceptions = [
33
+ flattened_e
34
+ for e in set(itertools.chain(context_to_follow, children_to_follow))
35
+ for flattened_e in _flatten_exception(e)
36
+ ]
37
+
38
+ return maybe_this_exception + other_exceptions
inspect_ai/util/_json.py CHANGED
@@ -2,12 +2,14 @@ import types
2
2
  import typing
3
3
  from copy import deepcopy
4
4
  from dataclasses import is_dataclass
5
+ from datetime import date, datetime, time
5
6
  from typing import (
6
7
  Any,
7
8
  Dict,
8
9
  List,
9
10
  Literal,
10
11
  Optional,
12
+ Set,
11
13
  Tuple,
12
14
  Type,
13
15
  Union,
@@ -30,6 +32,9 @@ class JSONSchema(BaseModel):
30
32
  type: JSONType | None = Field(default=None)
31
33
  """JSON type of tool parameter."""
32
34
 
35
+ format: str | None = Field(default=None)
36
+ """Format of the parameter (e.g. date-time)."""
37
+
33
38
  description: str | None = Field(default=None)
34
39
  """Parameter description."""
35
40
 
@@ -80,7 +85,13 @@ def json_schema(t: Type[Any]) -> JSONSchema:
80
85
  return JSONSchema(type="string")
81
86
  elif t is bool:
82
87
  return JSONSchema(type="boolean")
83
- elif t is list:
88
+ elif t is datetime:
89
+ return JSONSchema(type="string", format="date-time")
90
+ elif t is date:
91
+ return JSONSchema(type="string", format="date")
92
+ elif t is time:
93
+ return JSONSchema(type="string", format="time")
94
+ elif t is list or t is set:
84
95
  return JSONSchema(type="array", items=JSONSchema())
85
96
  elif t is dict:
86
97
  return JSONSchema(type="object", additionalProperties=JSONSchema())
@@ -94,7 +105,14 @@ def json_schema(t: Type[Any]) -> JSONSchema:
94
105
  return JSONSchema(type="null")
95
106
  else:
96
107
  return JSONSchema()
97
- elif origin is list or origin is List or origin is tuple or origin is Tuple:
108
+ elif (
109
+ origin is list
110
+ or origin is List
111
+ or origin is tuple
112
+ or origin is Tuple
113
+ or origin is set
114
+ or origin is Set
115
+ ):
98
116
  return JSONSchema(
99
117
  type="array", items=json_schema(args[0]) if args else JSONSchema()
100
118
  )
@@ -24,7 +24,7 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
24
24
  """Get the SandboxEnvironment for the current sample.
25
25
 
26
26
  Args:
27
- name (str | None): Optional sandbox environmnent name.
27
+ name (str | None): Optional sandbox environment name.
28
28
 
29
29
  Return:
30
30
  SandboxEnvironment instance.
@@ -45,12 +45,14 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
45
45
  environment = environments.get(name, None)
46
46
  if not environment:
47
47
  raise ValueError(
48
- f"SandboxEnvironment '{name}' is not a recoginized environment name."
48
+ f"SandboxEnvironment '{name}' is not a recognized environment name."
49
49
  )
50
50
  return environment
51
51
 
52
52
 
53
- async def sandbox_with(file: str, on_path: bool = False) -> SandboxEnvironment | None:
53
+ async def sandbox_with(
54
+ file: str, on_path: bool = False, *, name: str | None = None
55
+ ) -> SandboxEnvironment | None:
54
56
  """Get the SandboxEnvironment for the current sample that has the specified file.
55
57
 
56
58
  Args:
@@ -58,9 +60,12 @@ async def sandbox_with(file: str, on_path: bool = False) -> SandboxEnvironment |
58
60
  True, file should be a filename that exists on the system path.
59
61
  on_path (bool): If True, file is a filename to be verified using "which".
60
62
  If False, file is a path to be checked within the sandbox environments.
63
+ name (str | None): Optional sandbox environment name.
64
+
61
65
 
62
66
  Return:
63
- SandboxEnvironment instance or None if no sandboxes had the file.
67
+ SandboxEnvironment instance or None if none of the sandboxes (or the named
68
+ sandbox) had the file.
64
69
  """
65
70
  # get environments and with mapping
66
71
  environments = sandbox_environments_context_var.get(None)
@@ -71,13 +76,19 @@ async def sandbox_with(file: str, on_path: bool = False) -> SandboxEnvironment |
71
76
  raise_no_sandbox()
72
77
 
73
78
  # if we've already discovered the sandbox for this file then return it
74
- environment_with_key = f"{file}:{on_path}"
79
+ environment_with_key = f"{name or ''}:{file}:{on_path}"
75
80
  environment = environments_with.get(environment_with_key, None)
76
81
  if environment is not None:
77
82
  return environment
78
83
 
79
- # look in each sandbox
80
- for _, environment in environments.items():
84
+ # look in each (or the named) sandbox
85
+ for environment in (
86
+ environments.values()
87
+ if name is None
88
+ else [named_env]
89
+ if (named_env := environments.get(name, None))
90
+ else []
91
+ ):
81
92
  try:
82
93
  if on_path:
83
94
  # can we find the file on the path?
@@ -96,7 +96,7 @@ async def compose_cp(
96
96
  output_limit: int | None = None,
97
97
  ) -> None:
98
98
  result = await compose_command(
99
- ["cp", "--", src, dest],
99
+ ["cp", "-L", "--", src, dest],
100
100
  project=project,
101
101
  timeout=120, # 2-minute timeout for file copies
102
102
  cwd=cwd,