inspect-ai 0.3.90__py3-none-any.whl → 0.3.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +40 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -41
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3640 -3563
  25. inspect_ai/_view/www/dist/assets/index.js +59204 -52519
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +83 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.91.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.90.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,104 @@
1
+ import sys
2
+ from contextlib import asynccontextmanager
3
+ from typing import TextIO
4
+
5
+ import anyio
6
+ from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
7
+ from mcp import JSONRPCRequest, StdioServerParameters
8
+ from mcp.types import JSONRPCMessage, JSONRPCNotification
9
+
10
+ from inspect_ai.tool._tool_support_helpers import (
11
+ exec_model_request,
12
+ exec_notification,
13
+ exec_scalar_request,
14
+ tool_container_sandbox,
15
+ )
16
+
17
+ from ._context import MCPServerContext
18
+
19
+
20
+ # Pardon the type: ignore's here. This code is a modified clone of Anthropic code
21
+ # for stdio_client. In their case, they don't provide a type hint for the return
22
+ # value. We suspect that if they did, they'd encounter the same issues we're
23
+ # suppressing. Nevertheless, we're confident that the runtime behavior of the
24
+ # code is what we want, and that the errors are purely in the type domain.
25
+ @asynccontextmanager # type: ignore
26
+ async def sandbox_client( # type: ignore
27
+ server: StdioServerParameters,
28
+ *,
29
+ sandbox_name: str | None = None,
30
+ errlog: TextIO = sys.stderr,
31
+ ) -> MCPServerContext: # type: ignore
32
+ sandbox_environment = await tool_container_sandbox(
33
+ "mcp support", sandbox_name=sandbox_name
34
+ )
35
+
36
+ # read_stream is remote process's stdout
37
+ read_stream: MemoryObjectReceiveStream[JSONRPCMessage | Exception]
38
+ read_stream_writer: MemoryObjectSendStream[JSONRPCMessage | Exception]
39
+
40
+ # write_stream is remote process's stdin
41
+ write_stream: MemoryObjectSendStream[JSONRPCMessage]
42
+ write_stream_reader: MemoryObjectReceiveStream[JSONRPCMessage]
43
+
44
+ read_stream_writer, read_stream = anyio.create_memory_object_stream(0)
45
+ write_stream, write_stream_reader = anyio.create_memory_object_stream(0)
46
+
47
+ session_id = await exec_scalar_request(
48
+ sandbox=sandbox_environment,
49
+ method="mcp_launch_server",
50
+ params={"server_params": server.model_dump()},
51
+ result_type=int,
52
+ )
53
+
54
+ async def stdout_reader() -> None:
55
+ # This is NYI until we support unsolicited messages from the sandbox
56
+ # back to the client
57
+ pass
58
+
59
+ async def stdin_writer() -> None:
60
+ try:
61
+ async with write_stream_reader:
62
+ # This reads messages until the stream is closed
63
+ async for message in write_stream_reader:
64
+ root = message.root
65
+ if isinstance(root, JSONRPCRequest):
66
+ await read_stream_writer.send(
67
+ await exec_model_request(
68
+ sandbox=sandbox_environment,
69
+ method="mcp_send_request",
70
+ params={
71
+ "session_id": session_id,
72
+ "request": root.model_dump(),
73
+ },
74
+ result_type=JSONRPCMessage,
75
+ )
76
+ )
77
+ elif isinstance(root, JSONRPCNotification):
78
+ await exec_notification(
79
+ sandbox=sandbox_environment,
80
+ method="mcp_send_notification",
81
+ params={
82
+ "session_id": session_id,
83
+ "notification": root.model_dump(),
84
+ },
85
+ )
86
+ else:
87
+ assert False, f"Unexpected message type {message=}"
88
+
89
+ except anyio.ClosedResourceError:
90
+ await anyio.lowlevel.checkpoint()
91
+
92
+ async with anyio.create_task_group() as tg:
93
+ tg.start_soon(stdout_reader)
94
+ tg.start_soon(stdin_writer)
95
+
96
+ try:
97
+ yield read_stream, write_stream
98
+ finally:
99
+ await exec_scalar_request(
100
+ sandbox=sandbox_environment,
101
+ method="mcp_kill_server",
102
+ params={"session_id": session_id},
103
+ result_type=type(None),
104
+ )
@@ -0,0 +1,31 @@
1
+ import abc
2
+ from logging import getLogger
3
+ from typing import Literal
4
+
5
+ from .._tool import Tool, ToolSource
6
+
7
+ logger = getLogger(__name__)
8
+
9
+
10
+ class MCPServer(ToolSource):
11
+ """Model Context Protocol server interface.
12
+
13
+ `MCPServer` can be passed in the `tools` argument as a source of tools
14
+ (use the `mcp_tools()` function to filter the list of tools)
15
+
16
+ """
17
+
18
+ async def tools(self) -> list[Tool]:
19
+ """List of all tools provided by this server."""
20
+ return await self._list_tools()
21
+
22
+ @abc.abstractmethod
23
+ async def _connect(self) -> None: ...
24
+
25
+ @abc.abstractmethod
26
+ async def _close(self) -> None: ...
27
+
28
+ @abc.abstractmethod
29
+ async def _list_tools(
30
+ self, tools: Literal["all"] | list[str] = "all"
31
+ ) -> list[Tool]: ...
@@ -0,0 +1,60 @@
1
+ import contextlib
2
+ from types import TracebackType
3
+ from typing import AsyncIterator, Sequence
4
+
5
+ from .._tool import Tool, ToolSource
6
+ from .._tool_def import ToolDef
7
+ from ._types import MCPServer
8
+ from .tools import MCPToolSource
9
+
10
+
11
+ @contextlib.asynccontextmanager
12
+ async def mcp_connection(
13
+ tools: Sequence[Tool | ToolDef | ToolSource] | ToolSource,
14
+ ) -> AsyncIterator[None]:
15
+ """Context manager for running MCP servers required by tools.
16
+
17
+ Any `ToolSource` passed in tools will be examined to see
18
+ if it references an MCPServer, and if so, that server will be
19
+ connected to upon entering the context and disconnected from
20
+ upon exiting the context.
21
+
22
+ Args:
23
+ tools: Tools in current context.
24
+ """
25
+ # discover mcp servers in tools
26
+ tools = tools if isinstance(tools, Sequence) else [tools]
27
+ tool_sources = [tool for tool in tools if isinstance(tool, ToolSource)]
28
+ mcp_servers: list[MCPServer] = []
29
+ for tool_source in tool_sources:
30
+ if isinstance(tool_source, MCPServer):
31
+ mcp_servers.append(tool_source)
32
+ elif isinstance(tool_source, MCPToolSource):
33
+ mcp_servers.append(tool_source._server)
34
+
35
+ # enter connection contexts
36
+ async with contextlib.AsyncExitStack() as exit_stack:
37
+ for connection in [
38
+ MCPServerConnection(mcp_server) for mcp_server in mcp_servers
39
+ ]:
40
+ await exit_stack.enter_async_context(connection)
41
+
42
+ # onward
43
+ yield
44
+
45
+
46
+ class MCPServerConnection:
47
+ def __init__(self, server: MCPServer) -> None:
48
+ self._server = server
49
+
50
+ async def __aenter__(self) -> "MCPServerConnection":
51
+ await self._server._connect()
52
+ return self
53
+
54
+ async def __aexit__(
55
+ self,
56
+ exc_type: type[BaseException] | None,
57
+ exc: BaseException | None,
58
+ exc_tb: TracebackType | None,
59
+ ) -> None:
60
+ await self._server._close()
@@ -0,0 +1,118 @@
1
+ from typing import Any
2
+
3
+ from mcp.client.session import ClientSession
4
+ from mcp.shared.context import RequestContext
5
+ from mcp.types import (
6
+ INTERNAL_ERROR,
7
+ CreateMessageRequestParams,
8
+ CreateMessageResult,
9
+ EmbeddedResource,
10
+ ErrorData,
11
+ ImageContent,
12
+ TextContent,
13
+ TextResourceContents,
14
+ )
15
+ from mcp.types import (
16
+ StopReason as MCPStopReason,
17
+ )
18
+
19
+ from inspect_ai._util.content import Content, ContentImage, ContentText
20
+ from inspect_ai._util.error import exception_message
21
+ from inspect_ai._util.url import data_uri_mime_type, data_uri_to_base64
22
+
23
+
24
+ async def sampling_fn(
25
+ context: RequestContext[ClientSession, Any],
26
+ params: CreateMessageRequestParams,
27
+ ) -> CreateMessageResult | ErrorData:
28
+ from inspect_ai.model._chat_message import (
29
+ ChatMessage,
30
+ ChatMessageAssistant,
31
+ ChatMessageSystem,
32
+ ChatMessageUser,
33
+ )
34
+ from inspect_ai.model._generate_config import GenerateConfig
35
+ from inspect_ai.model._model import get_model
36
+
37
+ try:
38
+ # build message list
39
+ messages: list[ChatMessage] = []
40
+ if params.systemPrompt:
41
+ messages.append(ChatMessageSystem(content=params.systemPrompt))
42
+
43
+ for message in params.messages:
44
+ if message.role == "assistant":
45
+ messages.append(
46
+ ChatMessageAssistant(content=[as_inspect_content(message.content)])
47
+ )
48
+ elif message.role == "user":
49
+ messages.append(
50
+ ChatMessageUser(content=[as_inspect_content(message.content)])
51
+ )
52
+
53
+ # sample w/ requested params
54
+ output = await get_model().generate(
55
+ messages,
56
+ config=GenerateConfig(
57
+ temperature=params.temperature,
58
+ max_tokens=params.maxTokens,
59
+ stop_seqs=params.stopSequences,
60
+ ),
61
+ )
62
+
63
+ # convert stop reason
64
+ stop_reason: MCPStopReason = (
65
+ "maxTokens" if output.stop_reason == "max_tokens" else "endTurn"
66
+ )
67
+
68
+ # return first compatible content
69
+ if isinstance(output.message.content, str):
70
+ return CreateMessageResult(
71
+ role="assistant",
72
+ content=TextContent(type="text", text=output.message.content),
73
+ model=output.model,
74
+ stopReason=stop_reason,
75
+ )
76
+ else:
77
+ for content in output.message.content:
78
+ if isinstance(content, ContentText | ContentImage):
79
+ return CreateMessageResult(
80
+ role="assistant",
81
+ content=as_mcp_content(content),
82
+ model=output.model,
83
+ stopReason=stop_reason,
84
+ )
85
+
86
+ # if we get this far then no valid content was returned
87
+ return ErrorData(
88
+ code=INTERNAL_ERROR, message="No text or image content was generated."
89
+ )
90
+
91
+ except Exception as ex:
92
+ return ErrorData(code=INTERNAL_ERROR, message=exception_message(ex))
93
+
94
+
95
+ def as_inspect_content(
96
+ content: TextContent | ImageContent | EmbeddedResource,
97
+ ) -> Content:
98
+ if isinstance(content, TextContent):
99
+ return ContentText(text=content.text)
100
+ elif isinstance(content, ImageContent):
101
+ return ContentImage(
102
+ image=f"data:image/{content.mimeType};base64,{content.data}"
103
+ )
104
+ elif isinstance(content.resource, TextResourceContents):
105
+ return ContentText(text=content.resource.text)
106
+ else:
107
+ raise ValueError(f"Unexpected content: {content}")
108
+
109
+
110
+ def as_mcp_content(content: ContentText | ContentImage) -> TextContent | ImageContent:
111
+ if isinstance(content, ContentText):
112
+ return TextContent(type="text", text=content.text)
113
+ else:
114
+ return ImageContent(
115
+ type="image",
116
+ mimeType=data_uri_mime_type(content.image) or "image/png",
117
+ data=data_uri_to_base64(content.image),
118
+ )
@@ -0,0 +1,112 @@
1
+ from logging import getLogger
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from inspect_ai._util.error import pip_dependency_error
6
+ from inspect_ai._util.version import verify_required_version
7
+
8
+ from ._types import MCPServer
9
+
10
+ logger = getLogger(__name__)
11
+
12
+
13
+ def mcp_server_sse(
14
+ *,
15
+ url: str,
16
+ headers: dict[str, Any] | None = None,
17
+ timeout: float = 5,
18
+ sse_read_timeout: float = 60 * 5,
19
+ ) -> MCPServer:
20
+ """MCP Server (SSE).
21
+
22
+ SSE interface to MCP server. Use this for MCP servers available via a URL endpoint.
23
+
24
+ Args:
25
+ url: URL to remote server
26
+ headers: Headers to send server (typically authorization is included here)
27
+ timeout: Timeout for HTTP operations
28
+ sse_read_timeout: How long (in seconds) the client will wait for a new
29
+ event before disconnecting.
30
+
31
+ Returns:
32
+ McpClient: Client for MCP Server
33
+ """
34
+ verfify_mcp_package()
35
+ from ._mcp import create_server_sse
36
+
37
+ return create_server_sse(url, headers, timeout, sse_read_timeout)
38
+
39
+
40
+ def mcp_server_stdio(
41
+ *,
42
+ command: str,
43
+ args: list[str] = [],
44
+ cwd: str | Path | None = None,
45
+ env: dict[str, str] | None = None,
46
+ ) -> MCPServer:
47
+ """MCP Server (Stdio).
48
+
49
+ Stdio interface to MCP server. Use this for MCP servers that run locally.
50
+
51
+ Args:
52
+ command: The executable to run to start the server.
53
+ args: Command line arguments to pass to the executable.
54
+ env: The environment to use when spawning the process
55
+ in addition to the platform specific set of default
56
+ environment variables (e.g. "HOME", "LOGNAME", "PATH",
57
+ "SHELL", "TERM", and "USER" for Posix-based systems).
58
+ cwd: The working directory to use when spawning the process.
59
+
60
+ Returns:
61
+ McpClient: Client for MCP Server
62
+ """
63
+ verfify_mcp_package()
64
+ from ._mcp import create_server_stdio
65
+
66
+ return create_server_stdio(command, args, cwd, env)
67
+
68
+
69
+ def mcp_server_sandbox(
70
+ *,
71
+ command: str,
72
+ args: list[str] = [],
73
+ cwd: str | Path | None = None,
74
+ env: dict[str, str] | None = None,
75
+ sandbox: str | None = None,
76
+ ) -> MCPServer:
77
+ """MCP Server (Sandbox).
78
+
79
+ Interface to MCP server running in an Inspect sandbox.
80
+
81
+ Args:
82
+ command: The executable to run to start the server.
83
+ args: Command line arguments to pass to the executable.
84
+ env: The environment to use when spawning the process
85
+ in addition to the platform specific set of default
86
+ environment variables (e.g. "HOME", "LOGNAME", "PATH",
87
+ "SHELL", "TERM", and "USER" for Posix-based systems).
88
+ cwd: The working directory to use when spawning the process.
89
+ sandbox: The sandbox to use when spawning the process.
90
+
91
+ Returns:
92
+ McpClient: Client for MCP Server
93
+ """
94
+ verfify_mcp_package()
95
+ from ._mcp import create_server_sandbox
96
+
97
+ return create_server_sandbox(command, args, cwd, env, sandbox)
98
+
99
+
100
+ def verfify_mcp_package() -> None:
101
+ FEATURE = "MCP tools"
102
+ PACKAGE = "mcp"
103
+ MIN_VERSION = "1.6.0"
104
+
105
+ # verify we have the package
106
+ try:
107
+ import mcp # noqa: F401
108
+ except ImportError:
109
+ raise pip_dependency_error(FEATURE, [PACKAGE])
110
+
111
+ # verify version
112
+ verify_required_version(FEATURE, PACKAGE, MIN_VERSION)
@@ -0,0 +1,34 @@
1
+ from typing import Literal
2
+
3
+ from .._tool import Tool, ToolSource
4
+ from ._types import MCPServer
5
+
6
+
7
+ def mcp_tools(
8
+ server: MCPServer,
9
+ *,
10
+ tools: Literal["all"] | list[str] = "all",
11
+ ) -> ToolSource:
12
+ """Tools from MCP server.
13
+
14
+ Args:
15
+ server: MCP server created with `mcp_server_stdio()` or `mcp_server_sse()`
16
+ tools: List of tool names (or globs) (defaults to "all")
17
+ which returns all tools.
18
+
19
+ Returns:
20
+ ToolSource: Source for specified MCP server tools.
21
+ """
22
+ return MCPToolSource(server, tools)
23
+
24
+
25
+ class MCPToolSource(ToolSource):
26
+ def __init__(self, server: MCPServer, tools: Literal["all"] | list[str]) -> None:
27
+ self._server = server
28
+ self._tools = tools
29
+ self._cached_tool_list: list[Tool] | None = None
30
+
31
+ async def tools(self) -> list[Tool]:
32
+ if self._cached_tool_list is None:
33
+ self._cached_tool_list = await self._server._list_tools(self._tools)
34
+ return self._cached_tool_list
inspect_ai/tool/_tool.py CHANGED
@@ -105,6 +105,19 @@ class Tool(Protocol):
105
105
  ...
106
106
 
107
107
 
108
+ @runtime_checkable
109
+ class ToolSource(Protocol):
110
+ """Protocol for dynamically providing a set of tools."""
111
+
112
+ async def tools(self) -> list[Tool]:
113
+ """Retrieve tools from tool source.
114
+
115
+ Returns:
116
+ List of tools
117
+ """
118
+ ...
119
+
120
+
108
121
  P = ParamSpec("P")
109
122
 
110
123
 
@@ -3,6 +3,7 @@ from typing import (
3
3
  Any,
4
4
  Callable,
5
5
  NamedTuple,
6
+ Sequence,
6
7
  )
7
8
 
8
9
  from inspect_ai._util.registry import (
@@ -13,7 +14,14 @@ from inspect_ai._util.registry import (
13
14
  set_registry_params,
14
15
  )
15
16
 
16
- from ._tool import TOOL_MODEL_INPUT, TOOL_PARALLEL, TOOL_PROMPT, TOOL_VIEWER, Tool
17
+ from ._tool import (
18
+ TOOL_MODEL_INPUT,
19
+ TOOL_PARALLEL,
20
+ TOOL_PROMPT,
21
+ TOOL_VIEWER,
22
+ Tool,
23
+ ToolSource,
24
+ )
17
25
  from ._tool_call import ToolCallModelInput, ToolCallViewer
18
26
  from ._tool_description import (
19
27
  ToolDescription,
@@ -157,10 +165,21 @@ def apply_description_overrides(target: ToolParams, overrides: dict[str, str]) -
157
165
  target.properties[param].description = value
158
166
 
159
167
 
160
- def tool_defs(
161
- tools: list[Tool] | list[ToolDef] | list[Tool | ToolDef],
168
+ async def tool_defs(
169
+ tools: Sequence[Tool | ToolDef | ToolSource] | ToolSource,
162
170
  ) -> list[ToolDef]:
163
- return [ToolDef(tool) if isinstance(tool, Tool) else tool for tool in tools]
171
+ if isinstance(tools, ToolSource):
172
+ tools = await tools.tools()
173
+
174
+ tool_defs: list[ToolDef] = []
175
+ for tool in tools:
176
+ if isinstance(tool, ToolSource):
177
+ tool_defs.extend([ToolDef(t) for t in await tool.tools()])
178
+ elif not isinstance(tool, ToolDef):
179
+ tool_defs.append(ToolDef(tool))
180
+ else:
181
+ tool_defs.append(tool)
182
+ return tool_defs
164
183
 
165
184
 
166
185
  class ToolDefFields(NamedTuple):
@@ -245,7 +264,5 @@ def validate_tool_parameters(tool_name: str, parameters: dict[str, ToolParam]) -
245
264
  f"{context} provided for parameter '{bound_name}' of function '{tool_name}'."
246
265
  )
247
266
 
248
- if param.type is None and not param.anyOf and not param.enum:
249
- raise_not_provided_error("Unsupported type or type annotation")
250
- elif not param.description:
267
+ if not param.description:
251
268
  raise_not_provided_error("Description not")