inspect-ai 0.3.89__py3-none-any.whl → 0.3.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +40 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -53
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3583 -3508
  25. inspect_ai/_view/www/dist/assets/index.js +59212 -52521
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +13 -5
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +22 -8
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -9
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +83 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.91.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.89.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Any
1
+ from typing import Any, Sequence
2
2
 
3
3
  from inspect_ai._util.registry import (
4
4
  RegistryInfo,
@@ -6,7 +6,8 @@ from inspect_ai._util.registry import (
6
6
  registry_unqualified_name,
7
7
  set_registry_info,
8
8
  )
9
- from inspect_ai.tool._tool import Tool, ToolResult
9
+ from inspect_ai.tool._tool import Tool, ToolResult, ToolSource
10
+ from inspect_ai.tool._tool_def import ToolDef
10
11
  from inspect_ai.tool._tool_description import ToolDescription, set_tool_description
11
12
 
12
13
  from ._agent import Agent
@@ -86,7 +87,9 @@ class AgentTool(Tool):
86
87
  raise RuntimeError("AgentTool should not be called directly")
87
88
 
88
89
 
89
- def has_handoff(tools: list[Tool] | None) -> bool:
90
+ def has_handoff(
91
+ tools: Sequence[Tool | ToolDef | ToolSource] | None,
92
+ ) -> bool:
90
93
  if tools:
91
94
  return any([isinstance(tool, AgentTool) for tool in tools])
92
95
  else:
@@ -18,6 +18,7 @@ def human_cli(
18
18
  answer: bool | str = True,
19
19
  intermediate_scoring: bool = False,
20
20
  record_session: bool = True,
21
+ user: str | None = None,
21
22
  ) -> Agent:
22
23
  """Human CLI agent for tasks that run in a sandbox.
23
24
 
@@ -37,6 +38,7 @@ def human_cli(
37
38
  that the answer matches the expected format.
38
39
  intermediate_scoring: Allow the human agent to check their score while working.
39
40
  record_session: Record all user commands and outputs in the sandbox bash session.
41
+ user: User to login as. Defaults to the sandbox environment's default user.
40
42
 
41
43
  Returns:
42
44
  Agent: Human CLI agent.
@@ -48,7 +50,7 @@ def human_cli(
48
50
  async with agent_lock:
49
51
  # ensure that we have a sandbox to work with
50
52
  try:
51
- connection = await sandbox().connection()
53
+ connection = await sandbox().connection(user=user)
52
54
  except ProcessLookupError:
53
55
  raise RuntimeError("Human agent must run in a task with a sandbox.")
54
56
  except NotImplementedError:
@@ -66,13 +68,13 @@ def human_cli(
66
68
  )
67
69
 
68
70
  # install agent tools
69
- await install_human_agent(commands, record_session)
71
+ await install_human_agent(user, commands, record_session)
70
72
 
71
73
  # hookup the view ui
72
74
  view.connect(connection)
73
75
 
74
76
  # run sandbox service
75
- return await run_human_agent_service(state, commands, view)
77
+ return await run_human_agent_service(user, state, commands, view)
76
78
 
77
79
  # support both fullscreen ui and fallback
78
80
  if display_type() == "full":
@@ -17,7 +17,9 @@ RECORD_SESSION_DIR = "/var/tmp/user-sessions"
17
17
 
18
18
 
19
19
  async def install_human_agent(
20
- commands: list[HumanAgentCommand], record_session: bool
20
+ user: str | None,
21
+ commands: list[HumanAgentCommand],
22
+ record_session: bool,
21
23
  ) -> None:
22
24
  # see if we have already installed
23
25
  if not (await sandbox().exec(["mkdir", HUMAN_AGENT_DIR])).success:
@@ -35,7 +37,7 @@ async def install_human_agent(
35
37
  await checked_write_file(f"{INSTALL_DIR}/{BASHRC}", bash_rc, executable=True)
36
38
 
37
39
  # write and run installation script
38
- install_sh = human_agent_install_sh()
40
+ install_sh = human_agent_install_sh(user)
39
41
  await checked_write_file(f"{INSTALL_DIR}/{INSTALL_SH}", install_sh, executable=True)
40
42
  await checked_exec(["bash", f"./{INSTALL_SH}"], cwd=INSTALL_DIR)
41
43
  await checked_exec(["rm", "-rf", INSTALL_DIR])
@@ -177,8 +179,8 @@ def human_agent_bashrc(commands: list[HumanAgentCommand], record_session: bool)
177
179
  INSTRUCTIONS = dedent("""
178
180
  if [ -z "$INSTRUCTIONS_SHOWN" ]; then
179
181
  export INSTRUCTIONS_SHOWN=1
180
- task instructions > instructions.txt
181
- cat instructions.txt
182
+ task instructions > ~/instructions.txt
183
+ cat ~/instructions.txt
182
184
  fi
183
185
  """).lstrip()
184
186
 
@@ -190,7 +192,7 @@ def human_agent_bashrc(commands: list[HumanAgentCommand], record_session: bool)
190
192
  return "\n".join([TERMINAL_CHECK, COMMANDS, RECORDING, INSTRUCTIONS, CLOCK])
191
193
 
192
194
 
193
- def human_agent_install_sh() -> str:
195
+ def human_agent_install_sh(user: str | None) -> str:
194
196
  return dedent(f"""
195
197
  #!/usr/bin/env bash
196
198
 
@@ -201,8 +203,15 @@ def human_agent_install_sh() -> str:
201
203
  # copy command script
202
204
  cp {TASK_PY} $HUMAN_AGENT
203
205
 
204
- # append to .bashrc
205
- cat {BASHRC} >> ~/{BASHRC}
206
+ # get user's home directory
207
+ USER="{user or ""}"
208
+ if [ -z "$USER" ]; then
209
+ USER=$(whoami)
210
+ fi
211
+ USER_HOME=$(getent passwd $USER | cut -d: -f6)
212
+
213
+ # append to user's .bashrc
214
+ cat {BASHRC} >> $USER_HOME/{BASHRC}
206
215
  """)
207
216
 
208
217
 
@@ -35,6 +35,7 @@ class HumanAgentPanel(InputPanel):
35
35
  VSCODE_LINKS_ID = "vscode-links"
36
36
  LOGIN_VSCODE_TERMINAL_ID = "login-vscode-terminal"
37
37
  LOGIN_VSCODE_WINDOW_ID = "login-vscode-window"
38
+ LOGIN_VSCODE_WINDOW_LABEL_ID = "login-vscode-window-label"
38
39
  COMMAND_INSTRUCTIONS_ID = "command-instructions"
39
40
  SANDBOX_COMMAND_ID = "sandbox-command"
40
41
 
@@ -88,7 +89,11 @@ class HumanAgentPanel(InputPanel):
88
89
  markup=False,
89
90
  )
90
91
  with Horizontal(id=self.VSCODE_LINKS_ID):
91
- yield Label("Login:", classes=self.LINK_LABEL_CLASS)
92
+ yield Label(
93
+ "Login:",
94
+ classes=self.LINK_LABEL_CLASS,
95
+ id=self.LOGIN_VSCODE_WINDOW_LABEL_ID,
96
+ )
92
97
  yield VSCodeLink(
93
98
  "VS Code Window",
94
99
  id=self.LOGIN_VSCODE_WINDOW_ID,
@@ -146,6 +151,14 @@ class HumanAgentPanel(InputPanel):
146
151
  window_btn = cast(
147
152
  VSCodeLink, self.query_one(f"#{self.LOGIN_VSCODE_WINDOW_ID}")
148
153
  )
154
+ window_lbl = cast(
155
+ Label, self.query_one(f"#{self.LOGIN_VSCODE_WINDOW_LABEL_ID}")
156
+ )
157
+ window_btn_and_lbl_display = (
158
+ vscode and connection.vscode_command is not None
159
+ )
160
+ window_btn.display = window_btn_and_lbl_display
161
+ window_lbl.display = window_btn_and_lbl_display
149
162
  if connection.vscode_command is not None:
150
163
  window_btn.commands = [
151
164
  VSCodeCommand(
@@ -10,7 +10,10 @@ from .view import HumanAgentView
10
10
 
11
11
 
12
12
  async def run_human_agent_service(
13
- state: AgentState, commands: list[HumanAgentCommand], view: HumanAgentView | None
13
+ user: str | None,
14
+ state: AgentState,
15
+ commands: list[HumanAgentCommand],
16
+ view: HumanAgentView | None,
14
17
  ) -> AgentState:
15
18
  # initialise agent state
16
19
  instructions = "\n\n".join([message.text for message in state.messages]).strip()
@@ -39,6 +42,7 @@ async def run_human_agent_service(
39
42
  methods=methods,
40
43
  until=task_is_completed,
41
44
  sandbox=sandbox(),
45
+ user=user,
42
46
  )
43
47
 
44
48
  # set the answer if we have one
@@ -1,5 +1,5 @@
1
1
  from logging import getLogger
2
- from typing import Literal, cast
2
+ from typing import Literal, Sequence, cast
3
3
 
4
4
  from inspect_ai._util._async import is_callable_coroutine
5
5
  from inspect_ai.model._call_tools import execute_tools
@@ -13,9 +13,10 @@ from inspect_ai.model._chat_message import (
13
13
  from inspect_ai.model._model import Model, get_model
14
14
  from inspect_ai.model._trim import trim_messages
15
15
  from inspect_ai.scorer._score import score
16
- from inspect_ai.tool._tool import Tool, ToolResult, tool
16
+ from inspect_ai.tool._mcp.connection import mcp_connection
17
+ from inspect_ai.tool._tool import Tool, ToolResult, ToolSource, tool
18
+ from inspect_ai.tool._tool_def import ToolDef
17
19
  from inspect_ai.tool._tool_info import parse_tool_info
18
- from inspect_ai.tool._tool_with import tool_with
19
20
 
20
21
  from ._agent import Agent, AgentState, agent, agent_with
21
22
  from ._filter import MessageFilter
@@ -37,7 +38,7 @@ def react(
37
38
  name: str | None = None,
38
39
  description: str | None = None,
39
40
  prompt: str | AgentPrompt | None = AgentPrompt(),
40
- tools: list[Tool] | None = None,
41
+ tools: Sequence[Tool | ToolDef | ToolSource] | None = None,
41
42
  model: str | Model | Agent | None = None,
42
43
  attempts: int | AgentAttempts = 1,
43
44
  submit: AgentSubmit = AgentSubmit(),
@@ -88,6 +89,31 @@ def react(
88
89
  Returns:
89
90
  ReAct agent.
90
91
  """
92
+
93
+ # default submit tool
94
+ @tool(name="submit")
95
+ def default_submit_tool() -> Tool:
96
+ async def execute(answer: str) -> ToolResult:
97
+ """Submit an answer for evaluation.
98
+
99
+ Args:
100
+ answer (str): Submitted answer
101
+ """
102
+ return answer
103
+
104
+ return execute
105
+
106
+ # resolve tools
107
+ tools = list(tools) if tools is not None else []
108
+
109
+ # resolve submit tool
110
+ submit_tool = ToolDef(
111
+ submit.tool or default_submit_tool(),
112
+ name=submit.name,
113
+ description=submit.description,
114
+ )
115
+ tools.append(submit_tool)
116
+
91
117
  # resolve prompt / system message
92
118
  prompt = AgentPrompt(prompt) if isinstance(prompt, str) else prompt
93
119
  if prompt:
@@ -98,7 +124,7 @@ def react(
98
124
  prompt_lines.append(prompt.handoff_prompt)
99
125
  if prompt.assistant_prompt:
100
126
  prompt_lines.append(prompt.assistant_prompt)
101
- prompt_content = "\n\n".join(prompt_lines).format(submit=submit.name)
127
+ prompt_content = "\n\n".join(prompt_lines).format(submit=submit_tool.name)
102
128
  system_message: ChatMessage | None = ChatMessageSystem(content=prompt_content)
103
129
  else:
104
130
  system_message = None
@@ -106,151 +132,146 @@ def react(
106
132
  # resolve attempts
107
133
  attempts = AgentAttempts(attempts) if isinstance(attempts, int) else attempts
108
134
 
109
- # submission tool
110
- @tool
111
- def submit_tool() -> Tool:
112
- async def execute(answer: str) -> ToolResult:
113
- """Submit an answer for evaluation.
114
-
115
- Args:
116
- answer (str): Submitted answer
117
- """
118
- return answer
119
-
120
- return execute
121
-
122
- # helper to extract a submitted answer
123
135
  def submission(tool_results: list[ChatMessage]) -> str | None:
124
136
  return next(
125
137
  (
126
138
  result.text
127
139
  for result in tool_results
128
140
  if isinstance(result, ChatMessageTool)
129
- and result.function == submit.name
141
+ and result.function == submit_tool.name
130
142
  ),
131
143
  None,
132
144
  )
133
145
 
134
- # resolve tools
135
- tools = tools or []
136
- tools.append(tool_with(submit_tool(), submit.name, submit.description))
137
-
138
146
  async def execute(state: AgentState) -> AgentState:
139
- # prepend system message if we have one
140
- if system_message:
141
- state.messages.insert(0, system_message)
142
-
143
- # resolve overflow handling
144
- if truncation == "auto":
145
- overflow = cast(MessageFilter | None, trim_messages)
146
- elif truncation == "disabled":
147
- overflow = None
148
- else:
149
- overflow = truncation
150
-
151
- # track attempts
152
- attempt_count = 0
153
-
154
- # main loop = will terminate after submit (subject to max_attempts)
155
- # or if a message or token limit is hit
156
- while True:
157
- # generate output and append assistant message
158
- state = await _agent_generate(model, state, tools)
159
-
160
- # check for context window overflow
161
- if state.output.stop_reason == "model_length":
162
- from inspect_ai.log._transcript import transcript
163
-
164
- if overflow is not None:
165
- previous_messages = state.messages[:-1]
166
- state.messages = await overflow(previous_messages)
167
- if len(state.messages) < len(previous_messages):
168
- transcript().info(
169
- "Agent exceeded model context window, truncating messages and continuing."
147
+ async with mcp_connection(tools):
148
+ # prepend system message if we have one
149
+ if system_message:
150
+ state.messages.insert(0, system_message)
151
+
152
+ # resolve overflow handling
153
+ if truncation == "auto":
154
+ overflow = cast(MessageFilter | None, trim_messages)
155
+ elif truncation == "disabled":
156
+ overflow = None
157
+ else:
158
+ overflow = truncation
159
+
160
+ # track attempts
161
+ attempt_count = 0
162
+
163
+ # main loop = will terminate after submit (subject to max_attempts)
164
+ # or if a message or token limit is hit
165
+ while True:
166
+ # generate output and append assistant message
167
+ state = await _agent_generate(model, state, tools)
168
+
169
+ # check for context window overflow
170
+ if state.output.stop_reason == "model_length":
171
+ from inspect_ai.log._transcript import transcript
172
+
173
+ if overflow is not None:
174
+ previous_messages = state.messages[:-1]
175
+ state.messages = await overflow(previous_messages)
176
+ if len(state.messages) < len(previous_messages):
177
+ transcript().info(
178
+ "Agent exceeded model context window, truncating messages and continuing."
179
+ )
180
+ continue
181
+
182
+ # no overflow policy or overflow didn't reduce conversation length
183
+ transcript().info("Agent terminated: model context window exceeded")
184
+ break
185
+
186
+ # resolve tool calls (if any)
187
+ if state.output.message.tool_calls:
188
+ # call tool functions
189
+ messages, output = await execute_tools(state.messages, tools)
190
+ state.messages.extend(messages)
191
+ if output:
192
+ state.output = output
193
+
194
+ # check for a submission
195
+ answer = submission(messages)
196
+ if answer is not None:
197
+ # set the output to the answer for scoring
198
+ state.output.completion = (
199
+ f"{state.output.completion}\n\n{answer}".strip()
170
200
  )
171
- continue
172
-
173
- # no overflow policy or overflow didn't reduce conversation length
174
- transcript().info("Agent terminated: model context window exceeded")
175
- break
176
-
177
- # resolve tool calls (if any)
178
- if state.output.message.tool_calls:
179
- # call tool functions
180
- messages, output = await execute_tools(state.messages, tools)
181
- state.messages.extend(messages)
182
- if output:
183
- state.output = output
184
-
185
- # check for a submission
186
- answer = submission(messages)
187
- if answer is not None:
188
- # set the output to the answer for scoring
189
- state.output.completion = (
190
- f"{state.output.completion}\n\n{answer}".strip()
191
- )
192
201
 
193
- # exit if we are at max_attempts
194
- attempt_count += 1
195
- if attempt_count >= attempts.attempts:
196
- break
202
+ # exit if we are at max_attempts
203
+ attempt_count += 1
204
+ if attempt_count >= attempts.attempts:
205
+ break
197
206
 
198
- # exit if the submission is successful
199
- answer_scores = await score(state)
200
- if attempts.score_value(answer_scores[0].value) == 1.0:
201
- break
207
+ # exit if the submission is successful
208
+ answer_scores = await score(state)
209
+ if attempts.score_value(answer_scores[0].value) == 1.0:
210
+ break
202
211
 
203
- # otherwise notify the model that it was incorrect and continue
204
- else:
205
- if callable(attempts.incorrect_message):
206
- if not is_callable_coroutine(attempts.incorrect_message):
207
- raise ValueError(
208
- "The incorrect_message function must be async."
212
+ # otherwise notify the model that it was incorrect and continue
213
+ else:
214
+ if callable(attempts.incorrect_message):
215
+ if not is_callable_coroutine(
216
+ attempts.incorrect_message
217
+ ):
218
+ raise ValueError(
219
+ "The incorrect_message function must be async."
220
+ )
221
+ response_message: str = (
222
+ await attempts.incorrect_message(
223
+ state, answer_scores
224
+ )
209
225
  )
210
- response_message: str = await attempts.incorrect_message(
211
- state, answer_scores
226
+ else:
227
+ response_message = attempts.incorrect_message
228
+
229
+ state.messages.append(
230
+ ChatMessageUser(content=response_message)
212
231
  )
213
- else:
214
- response_message = attempts.incorrect_message
215
-
216
- state.messages.append(ChatMessageUser(content=response_message))
217
-
218
- # call the on_continue hook (if any)
219
- if callable(on_continue):
220
- if not is_callable_coroutine(on_continue):
221
- raise ValueError("The on_continue function must be async.")
222
- do_continue = await cast(AgentContinue, on_continue)(state)
223
- if do_continue is True:
224
- # if there were no tool calls we need to send back a user message
225
- if not state.output.message.tool_calls:
232
+
233
+ # call the on_continue hook (if any)
234
+ if callable(on_continue):
235
+ if not is_callable_coroutine(on_continue):
236
+ raise ValueError("The on_continue function must be async.")
237
+ do_continue = await cast(AgentContinue, on_continue)(state)
238
+ if do_continue is True:
239
+ # if there were no tool calls we need to send back a user message
240
+ if not state.output.message.tool_calls:
241
+ state.messages.append(
242
+ ChatMessageUser(
243
+ content=DEFAULT_CONTINUE_PROMPT.format(
244
+ submit=submit_tool.name
245
+ )
246
+ )
247
+ )
248
+ elif isinstance(do_continue, str):
226
249
  state.messages.append(
227
250
  ChatMessageUser(
228
- content=DEFAULT_CONTINUE_PROMPT.format(
229
- submit=submit.name
230
- )
251
+ content=do_continue.format(submit=submit_tool.name)
231
252
  )
232
253
  )
233
- elif isinstance(do_continue, str):
254
+ else: # do_continue is False
255
+ break
256
+
257
+ # if there is no on_continue hook then add a user message if there were no tool calls
258
+ elif not state.output.message.tool_calls:
259
+ continue_msg = (
260
+ DEFAULT_CONTINUE_PROMPT
261
+ if on_continue is None
262
+ else str(on_continue)
263
+ )
234
264
  state.messages.append(
235
- ChatMessageUser(content=do_continue.format(submit=submit.name))
265
+ ChatMessageUser(
266
+ content=continue_msg.format(submit=submit_tool.name)
267
+ )
236
268
  )
237
- else: # do_continue is False
238
- break
239
269
 
240
- # if there is no on_continue hook then add a user message if there were no tool calls
241
- elif not state.output.message.tool_calls:
242
- continue_msg = (
243
- DEFAULT_CONTINUE_PROMPT if on_continue is None else str(on_continue)
244
- )
245
- state.messages.append(
246
- ChatMessageUser(content=continue_msg.format(submit=submit.name))
247
- )
248
-
249
- # once we are complete, remove submit tool calls from the history
250
- # (as they will potentially confuse parent agents who also have
251
- # their own submit tools that they are 'watching' for)
252
- state.messages = _remove_submit_tool(state.messages, submit.name)
253
- return state
270
+ # once we are complete, remove submit tool calls from the history
271
+ # (as they will potentially confuse parent agents who also have
272
+ # their own submit tools that they are 'watching' for)
273
+ state.messages = _remove_submit_tool(state.messages, submit_tool.name)
274
+ return state
254
275
 
255
276
  if name is not None or description is not None:
256
277
  return agent_with(execute, name=name, description=description)
@@ -259,12 +280,24 @@ def react(
259
280
 
260
281
 
261
282
  async def _agent_generate(
262
- model: str | Model | Agent | None, state: AgentState, tools: list[Tool]
283
+ model: str | Model | Agent | None,
284
+ state: AgentState,
285
+ tools: Sequence[Tool | ToolDef | ToolSource],
263
286
  ) -> AgentState:
264
287
  # convert model to agent
265
288
  if isinstance(model, str | Model) or model is None:
266
289
  model = _model_generate(model)
267
290
 
291
+ # resolve tools
292
+ resolved_tools: list[Tool] = []
293
+ for t in tools:
294
+ if isinstance(t, ToolSource):
295
+ resolved_tools.extend(await t.tools())
296
+ elif isinstance(t, ToolDef):
297
+ resolved_tools.append(t.as_tool())
298
+ else:
299
+ resolved_tools.append(t)
300
+
268
301
  # confirm we have a tools param
269
302
  agent_tool_info = parse_tool_info(model)
270
303
  if "tools" not in agent_tool_info.parameters.properties:
@@ -273,7 +306,7 @@ async def _agent_generate(
273
306
  )
274
307
 
275
308
  # call the agent
276
- return await model(state, tools)
309
+ return await model(state, resolved_tools)
277
310
 
278
311
 
279
312
  def _model_generate(model: str | Model | None) -> Agent:
@@ -2,6 +2,7 @@ from typing import Awaitable, Callable, NamedTuple, TypeAlias
2
2
 
3
3
  from inspect_ai.agent._agent import AgentState
4
4
  from inspect_ai.scorer._metric import Score, ValueToFloat, value_to_float
5
+ from inspect_ai.tool._tool import Tool
5
6
 
6
7
  DEFAULT_HANDOFF_PROMPT = """
7
8
  You are part of a multi-agent system designed to make agent coordination and
@@ -80,8 +81,18 @@ class AgentAttempts(NamedTuple):
80
81
  class AgentSubmit(NamedTuple):
81
82
  """Configure the submit tool of a react agent."""
82
83
 
83
- name: str = "submit"
84
- """Name for submit tool."""
84
+ name: str | None = None
85
+ """Name for submit tool (defaults to 'submit')."""
85
86
 
86
- description: str = "Submit an answer for evaluation."
87
- """Description of submit tool."""
87
+ description: str | None = None
88
+ """Description of submit tool (defaults to 'Submit an answer for evaluation')."""
89
+
90
+ tool: Tool | None = None
91
+ """Alternate implementation for submit tool.
92
+
93
+ The tool can provide its `name` and `description` internally,
94
+ or these values can be overriden by the `name` and `description`
95
+ fields in `AgentSubmit`
96
+
97
+ The tool should return the `answer` provided to it for scoring.
98
+ """
@@ -2,7 +2,7 @@ import fnmatch
2
2
  import sys
3
3
  from dataclasses import dataclass
4
4
  from pathlib import Path
5
- from typing import Any, Generator, cast
5
+ from typing import Any, Generator
6
6
 
7
7
  from pydantic import BaseModel, Field, model_validator
8
8
 
@@ -140,7 +140,7 @@ def approval_policies_from_config(
140
140
  def create_approval_policy(
141
141
  name: str, tools: str | list[str], params: dict[str, Any] = {}
142
142
  ) -> ApprovalPolicy:
143
- approver = cast(Approver, registry_create("approver", name, **params))
143
+ approver = registry_create("approver", name, **params)
144
144
  return ApprovalPolicy(approver, tools)
145
145
 
146
146
  # map config -> policy