inspect-ai 0.3.90__py3-none-any.whl → 0.3.92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +44 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -41
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3640 -3563
  25. inspect_ai/_view/www/dist/assets/index.js +59204 -52519
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +95 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.92.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.90.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,285 @@
1
+ import json
2
+ from itertools import count
3
+ from typing import Literal, Protocol, Type, TypeAlias, TypeVar
4
+
5
+ from pydantic import BaseModel, RootModel
6
+
7
+ from inspect_ai.tool._tool import ToolError
8
+
9
+
10
+ class JSONRPCResponseBase(BaseModel):
11
+ jsonrpc: Literal["2.0"]
12
+ id: int | float | str
13
+
14
+
15
+ class JSONRPCSuccessResponse(JSONRPCResponseBase):
16
+ result: object
17
+
18
+
19
+ JSONRPCParamsType: TypeAlias = list[object] | dict[str, object] | None
20
+
21
+
22
+ class JSONRPCIncoming(BaseModel):
23
+ jsonrpc: Literal["2.0"]
24
+ method: str
25
+ params: JSONRPCParamsType = None
26
+
27
+
28
+ class JSONRPCRequest(JSONRPCIncoming):
29
+ id: int | float | str
30
+
31
+
32
+ class JSONRPCNotification(JSONRPCIncoming):
33
+ pass
34
+
35
+
36
+ class JSONRPCError(BaseModel):
37
+ """See: https://www.jsonrpc.org/specification#error_object"""
38
+
39
+ code: int
40
+ message: str
41
+ data: object | None = None
42
+
43
+
44
+ class JSONRPCErrorResponse(JSONRPCResponseBase):
45
+ error: JSONRPCError
46
+
47
+
48
+ class JSONRPCResponse(RootModel[JSONRPCSuccessResponse | JSONRPCErrorResponse]):
49
+ pass
50
+
51
+
52
+ BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
53
+ ScalarT = TypeVar("ScalarT", str, int, float, bool, None)
54
+
55
+
56
+ class JSONRPCTransport(Protocol):
57
+ async def __call__(
58
+ self, method: str, params: JSONRPCParamsType, is_notification: bool
59
+ ) -> str: ...
60
+
61
+
62
+ class JSONRPCServerErrorMapper(Protocol):
63
+ def __call__(
64
+ self, code: int, message: str, method: str, params: JSONRPCParamsType
65
+ ) -> Exception: ...
66
+
67
+
68
+ async def exec_scalar_request(
69
+ method: str,
70
+ params: JSONRPCParamsType,
71
+ result_type: Type[ScalarT],
72
+ transport: JSONRPCTransport,
73
+ ) -> ScalarT:
74
+ """
75
+ Execute a JSON-RPC command expecting a scalar result.
76
+
77
+ Args:
78
+ method (str): The JSON-RPC method to call.
79
+ params (JSONRPCParamsType): The parameters for the JSON-RPC method.
80
+ result_type (Type[ScalarT]): The scalar type (str, int, float, bool, None) to validate the result against.
81
+ transport (JSONRPCTransport): The transport callable to use for the RPC communication.
82
+
83
+ Returns:
84
+ ScalarT: The scalar result of the JSON-RPC call.
85
+
86
+ Raises:
87
+ RuntimeError: If execution fails or if there is an error in the JSON-RPC response.
88
+ ToolParsingError: If the JSON-RPC response contains a specific error code indicating a parsing error.
89
+ ValueError: If the result is not of the expected scalar type.
90
+ """
91
+ rpc_result = await _exec_request(method=method, params=params, transport=transport)
92
+ if (result_type is type(None) and rpc_result is not None) or not isinstance(
93
+ rpc_result, result_type
94
+ ):
95
+ raise ValueError(f"Expected {result_type} result, got {type(rpc_result)}")
96
+ return rpc_result
97
+
98
+
99
+ async def exec_model_request(
100
+ method: str,
101
+ params: JSONRPCParamsType,
102
+ result_type: Type[BaseModelT],
103
+ transport: JSONRPCTransport,
104
+ ) -> BaseModelT:
105
+ """
106
+ Execute a JSON-RPC command to a sandbox environment expecting a model result.
107
+
108
+ Args:
109
+ method (str): The JSON-RPC method to call.
110
+ params (JSONRPCParamsType): The parameters for the JSON-RPC method.
111
+ result_type (Type[BaseModelT]): The Pydantic model class to validate and parse the result.
112
+ transport (JSONRPCTransport): The transport callable to use for the RPC communication.
113
+
114
+ Returns:
115
+ BaseModelT: The parsed and validated result of the JSON-RPC call.
116
+
117
+ Raises:
118
+ RuntimeError: If the sandbox execution fails or if there is an error in the JSON-RPC response.
119
+ ToolParsingError: If the JSON-RPC response contains a specific error code indicating a parsing error.
120
+ ValueError: If the result cannot be validated against the provided model class.
121
+ """
122
+ rpc_result = await _exec_request(method=method, params=params, transport=transport)
123
+ return result_type.model_validate(rpc_result, strict=True)
124
+
125
+
126
+ async def exec_notification(
127
+ method: str,
128
+ params: JSONRPCParamsType,
129
+ transport: JSONRPCTransport,
130
+ ) -> None:
131
+ """
132
+ Execute a JSON-RPC notification to a sandbox environment.
133
+
134
+ A notification is a JSON-RPC request that doesn't expect any response.
135
+
136
+ Args:
137
+ sandbox (SandboxEnvironment): The sandbox environment to execute the notification in.
138
+ method (str): The JSON-RPC method to call.
139
+ params (JSONRPCParamsType): The parameters for the JSON-RPC method.
140
+ transport (JSONRPCTransport): The transport callable to use for the RPC communication.
141
+
142
+ Returns:
143
+ None: The function always returns None if successful.
144
+
145
+ Raises:
146
+ RuntimeError: If the sandbox execution fails or if there is an unexpected response to the notification.
147
+ """
148
+ stdout = await transport(
149
+ method=method,
150
+ params=params,
151
+ is_notification=True,
152
+ )
153
+ if stdout.strip():
154
+ raise RuntimeError(
155
+ f"Unexpected response to a Notification: {_rpc_call_description(method, params)}: {stdout}"
156
+ )
157
+
158
+
159
+ async def _exec_request(
160
+ *,
161
+ method: str,
162
+ params: JSONRPCParamsType,
163
+ transport: JSONRPCTransport,
164
+ ) -> object:
165
+ """Execute a request using the provided transport mechanism."""
166
+ return parse_json_rpc_response(
167
+ await transport(
168
+ method=method,
169
+ params=params,
170
+ is_notification=False,
171
+ ),
172
+ method,
173
+ params,
174
+ )
175
+
176
+
177
+ def parse_json_rpc_response(
178
+ response_str: str,
179
+ method: str,
180
+ params: JSONRPCParamsType,
181
+ ) -> object:
182
+ """Validates the JSON RPC response and returns the result or raises a proper Inspect error."""
183
+ match JSONRPCResponse.model_validate_json(response_str).root:
184
+ case JSONRPCSuccessResponse(result=rpc_result):
185
+ return rpc_result
186
+ case JSONRPCErrorResponse(
187
+ error=JSONRPCError(code=code, message=message, data=_)
188
+ ):
189
+ raise exception_for_rpc_response_error(code, message, method, params)
190
+ case _:
191
+ raise ValueError(
192
+ f"Unexpected JSON RPC response to request {_rpc_call_description(method, params)}: {response_str}"
193
+ )
194
+
195
+
196
+ def exception_for_rpc_response_error(
197
+ code: int,
198
+ message: str,
199
+ method: str,
200
+ params: JSONRPCParamsType,
201
+ server_error_mapper: JSONRPCServerErrorMapper | None = None,
202
+ ) -> Exception:
203
+ """Maps JSON-RPC error codes to Inspect tool related exceptions."""
204
+ # code message meaning
205
+ # -32000
206
+ # | Server error Reserved for implementation-defined server-errors.
207
+ # -32099
208
+ # -32600 Invalid Request The JSON sent is not a valid Request object.
209
+ # -32601 Method not found The method does not exist / is not available.
210
+ # -32602 Invalid params Invalid method parameter(s).
211
+ # -32603 Internal error Internal JSON-RPC error.
212
+ # -32700 Parse error Invalid JSON was received by the server. An error occurred on the server while parsing the JSON text.
213
+
214
+ if -32099 <= code <= -32000:
215
+ # This range is server defined. This layer has no idea what server was
216
+ # called, so if special mapping is needed, it must be provided by the
217
+ # caller.
218
+ return (
219
+ server_error_mapper(code, message, method, params)
220
+ if server_error_mapper
221
+ else ToolError(message)
222
+ )
223
+ elif code == -32603:
224
+ return ToolError(message)
225
+ else:
226
+ # -32600 (Invalid Request)
227
+ # If we sent a bogus request, it's 100% a code bug.
228
+ # -32601 (Method not found)
229
+ # -32602 (Invalid params)
230
+ # These shouldn't be possible since Inspect did validation prior to
231
+ # making the tool call. Because of that, these errors should not make
232
+ # it back to the model, so choose RuntimeError.
233
+ # -32700 (Parse error)
234
+ # shouldn't be seen in this flow since we're processing responses, and
235
+ # this is a request oriented error.
236
+ #
237
+ return RuntimeError(
238
+ f"Error executing tool command{f' {_rpc_call_description(method, params)}' if method and params else ''}: {code=} {message}"
239
+ )
240
+
241
+
242
+ def _rpc_call_description(method: str, params: JSONRPCParamsType) -> str:
243
+ """
244
+ Generate a string description of an RPC call.
245
+
246
+ Args:
247
+ method (str): The name of the RPC method.
248
+ params (JSONRPCParamsType): The parameters for the RPC method.
249
+
250
+ Returns:
251
+ str: A string description of the RPC call.
252
+
253
+ Examples:
254
+ >>> _rpc_call_description("subtract", {"minuend": 42, "subtrahend": 23})
255
+ 'subtract(minuend: 42, subtrahend: 23)'
256
+
257
+ >>> _rpc_call_description("subtract", (42, 23))
258
+ 'subtract(42, 23)'
259
+ """
260
+ normalized_params = (
261
+ ""
262
+ if params is None
263
+ else list(map(str, params))
264
+ if isinstance(params, list)
265
+ else [f"{k}: {v}" for k, v in params.items()]
266
+ )
267
+ return f"{method}({', '.join(normalized_params)})"
268
+
269
+
270
+ id_generator = count(666)
271
+
272
+
273
+ def create_json_rpc_request(
274
+ method: str,
275
+ params: JSONRPCParamsType,
276
+ is_notification: bool,
277
+ ) -> str:
278
+ return json.dumps(
279
+ {
280
+ "jsonrpc": "2.0",
281
+ "method": method,
282
+ **({"params": params} if params else {}),
283
+ **({"id": next(id_generator)} if not is_notification else {}),
284
+ }
285
+ )
@@ -0,0 +1,13 @@
1
+ from ._types import MCPServer
2
+ from .connection import mcp_connection
3
+ from .server import mcp_server_sandbox, mcp_server_sse, mcp_server_stdio
4
+ from .tools import mcp_tools
5
+
6
+ __all__ = [
7
+ "mcp_tools",
8
+ "mcp_server_stdio",
9
+ "mcp_server_sse",
10
+ "mcp_server_sandbox",
11
+ "mcp_connection",
12
+ "MCPServer",
13
+ ]
@@ -0,0 +1,14 @@
1
+ from contextlib import _AsyncGeneratorContextManager
2
+ from typing import TypeAlias
3
+
4
+ from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
5
+ from mcp.types import (
6
+ JSONRPCMessage,
7
+ )
8
+
9
+ MCPServerContext: TypeAlias = _AsyncGeneratorContextManager[
10
+ tuple[
11
+ MemoryObjectReceiveStream[JSONRPCMessage | Exception],
12
+ MemoryObjectSendStream[JSONRPCMessage],
13
+ ],
14
+ ]
@@ -0,0 +1,293 @@
1
+ import contextlib
2
+ import sys
3
+ from contextlib import AsyncExitStack
4
+ from fnmatch import fnmatch
5
+ from logging import getLogger
6
+ from pathlib import Path
7
+ from typing import Any, AsyncIterator, Callable, Literal
8
+
9
+ import anyio
10
+ from mcp import McpError
11
+ from mcp.client.session import ClientSession, SamplingFnT
12
+ from mcp.client.sse import sse_client
13
+ from mcp.client.stdio import StdioServerParameters, stdio_client
14
+ from mcp.types import (
15
+ EmbeddedResource,
16
+ ImageContent,
17
+ TextContent,
18
+ TextResourceContents,
19
+ )
20
+ from mcp.types import Tool as MCPTool
21
+ from typing_extensions import override
22
+
23
+ from inspect_ai._util.format import format_function_call
24
+ from inspect_ai._util.trace import trace_action
25
+ from inspect_ai.tool._json_rpc_helpers import exception_for_rpc_response_error
26
+ from inspect_ai.tool._tool import Tool, ToolError, ToolResult
27
+ from inspect_ai.tool._tool_def import ToolDef
28
+ from inspect_ai.tool._tool_params import ToolParams
29
+
30
+ from ._context import MCPServerContext
31
+ from ._sandbox import sandbox_client
32
+ from ._types import MCPServer
33
+ from .sampling import as_inspect_content, sampling_fn
34
+
35
+ # https://github.com/modelcontextprotocol/python-sdk/pull/401
36
+ # https://github.com/modelcontextprotocol/python-sdk/pull/361
37
+ # https://github.com/modelcontextprotocol/python-sdk/pull/289
38
+
39
+ logger = getLogger(__name__)
40
+
41
+
42
+ class MCPServerImpl(MCPServer):
43
+ def __init__(
44
+ self, client: Callable[[], MCPServerContext], *, name: str, events: bool
45
+ ) -> None:
46
+ super().__init__()
47
+ self._client = client
48
+ self._name = name
49
+ self._events = events
50
+
51
+ @override
52
+ async def _connect(self) -> None:
53
+ await self._task_session()._connect()
54
+
55
+ @override
56
+ async def _close(self) -> None:
57
+ await self._task_session()._close()
58
+
59
+ async def _list_tools(
60
+ self, tools: Literal["all"] | list[str] = "all"
61
+ ) -> list[Tool]:
62
+ return await self._task_session()._list_tools(tools)
63
+
64
+ # create a separate MCPServer session per async task
65
+ _task_sessions: dict[int, "MCPServerSession"] = {}
66
+
67
+ def _task_session(self) -> "MCPServerSession":
68
+ task_id = anyio.get_current_task().id
69
+ if task_id not in self._task_sessions:
70
+ MCPServerImpl._task_sessions[task_id] = MCPServerSession(
71
+ self._client, name=self._name, events=self._events
72
+ )
73
+ return MCPServerImpl._task_sessions[task_id]
74
+
75
+
76
+ class MCPServerSession(MCPServer):
77
+ def __init__(
78
+ self, client: Callable[[], MCPServerContext], *, name: str, events: bool
79
+ ) -> None:
80
+ super().__init__()
81
+ self._refcount = 0
82
+ self._client = client
83
+ self._name = name
84
+ self._events = events
85
+ self._session: ClientSession | None = None
86
+ self._exit_stack: AsyncExitStack | None = None
87
+ self._cached_tool_list: list[MCPTool] | None = None
88
+
89
+ @override
90
+ async def _connect(self) -> None:
91
+ if self._session is not None:
92
+ assert self._refcount > 0
93
+ self._refcount = self._refcount + 1
94
+ else:
95
+ assert self._refcount == 0
96
+ self._exit_stack = AsyncExitStack()
97
+ await self._exit_stack.__aenter__()
98
+ with trace_action(logger, "MCPServer", f"create client ({self._name})"):
99
+ read, write = await self._exit_stack.enter_async_context(self._client())
100
+ with trace_action(logger, "MCPServer", f"create session ({self._name})"):
101
+ self._session = await self._exit_stack.enter_async_context(
102
+ ClientSession(read, write, sampling_callback=self._sampling_fn())
103
+ )
104
+ with trace_action(
105
+ logger, "MCPServer", f"initialize session ({self._name})"
106
+ ):
107
+ await self._session.initialize()
108
+ self._refcount = 1
109
+
110
+ @override
111
+ async def _close(self) -> None:
112
+ assert self._refcount > 0
113
+ self._refcount = self._refcount - 1
114
+ if self._refcount == 0:
115
+ with trace_action(logger, "MCPServer", f"disconnect ({self._name})"):
116
+ assert self._session is not None
117
+ assert self._exit_stack is not None
118
+ try:
119
+ await self._exit_stack.aclose()
120
+ finally:
121
+ self._session = None
122
+ self._exit_stack = None
123
+
124
+ async def _list_tools(
125
+ self, tools: Literal["all"] | list[str] = "all"
126
+ ) -> list[Tool]:
127
+ if self._cached_tool_list:
128
+ mcp_tools = self._cached_tool_list
129
+ else:
130
+ async with self._client_session() as session:
131
+ # get the underlying tools on the server
132
+ with trace_action(logger, "MCPServer", f"list_tools {self._name}"):
133
+ mcp_tools = (await session.list_tools()).tools
134
+ self._cached_tool_list = mcp_tools
135
+
136
+ # filter them
137
+ def include_tool(tool: MCPTool) -> bool:
138
+ if tools == "all":
139
+ return True
140
+ else:
141
+ return any([fnmatch(tool.name, t) for t in tools])
142
+
143
+ mcp_tools = [mcp_tool for mcp_tool in mcp_tools if include_tool(mcp_tool)]
144
+
145
+ # dynamically create tools
146
+ return [
147
+ self._tool_def_from_mcp_tool(mcp_tool).as_tool() for mcp_tool in mcp_tools
148
+ ]
149
+
150
+ def _tool_def_from_mcp_tool(self, mcp_tool: MCPTool) -> ToolDef:
151
+ async def execute(**kwargs: Any) -> ToolResult:
152
+ async with self._client_session() as tool_session:
153
+ mcp_call = format_function_call(
154
+ mcp_tool.name, kwargs, width=sys.maxsize
155
+ )
156
+ with trace_action(
157
+ logger, "MCPServer", f"call_tool ({self._name}): {mcp_call}"
158
+ ):
159
+ try:
160
+ result = await tool_session.call_tool(mcp_tool.name, kwargs)
161
+ if result.isError:
162
+ raise ToolError(tool_result_as_text(result.content))
163
+ except McpError as e:
164
+ # Some errors that are raised via McpError (e.g. -32603)
165
+ # need to be converted to ToolError so that they make it
166
+ # back to the model.
167
+ raise exception_for_rpc_response_error(
168
+ e.error.code, e.error.message, mcp_tool.name, kwargs
169
+ ) from e
170
+
171
+ return [as_inspect_content(c) for c in result.content]
172
+
173
+ # get parameters (fill in missing ones)
174
+ parameters = ToolParams.model_validate(mcp_tool.inputSchema)
175
+ for name, param in parameters.properties.items():
176
+ param.description = param.description or name
177
+
178
+ return ToolDef(
179
+ execute,
180
+ name=mcp_tool.name,
181
+ description=mcp_tool.description,
182
+ parameters=parameters,
183
+ )
184
+
185
+ # if we have been entered as a context manager then return that session,
186
+ # otherwise, create a brand new session from the client
187
+ @contextlib.asynccontextmanager
188
+ async def _client_session(self) -> AsyncIterator[ClientSession]:
189
+ # if _connect has been previously called and we still have the connection
190
+ # to the session, we can just return nit
191
+ if self._session is not None:
192
+ yield self._session
193
+
194
+ # otherwise, create a new session and yield it (it will be cleaned up
195
+ # when the context manager exits)
196
+ else:
197
+ async with AsyncExitStack() as exit_stack:
198
+ with trace_action(logger, "MCPServer", f"create client ({self._name})"):
199
+ read, write = await exit_stack.enter_async_context(self._client())
200
+ with trace_action(
201
+ logger, "MCPServer", f"create session ({self._name})"
202
+ ):
203
+ session = await exit_stack.enter_async_context(
204
+ ClientSession(
205
+ read, write, sampling_callback=self._sampling_fn()
206
+ )
207
+ )
208
+ with trace_action(
209
+ logger, "MCPServer", f"initialize session ({self._name})"
210
+ ):
211
+ await session.initialize()
212
+ yield session
213
+
214
+ def _sampling_fn(self) -> SamplingFnT | None:
215
+ from inspect_ai.model._model import active_model
216
+
217
+ if self._events and active_model() is not None:
218
+ return sampling_fn
219
+ else:
220
+ return None
221
+
222
+
223
+ def create_server_sse(
224
+ url: str,
225
+ headers: dict[str, Any] | None = None,
226
+ timeout: float = 5,
227
+ sse_read_timeout: float = 60 * 5,
228
+ ) -> MCPServer:
229
+ return MCPServerImpl(
230
+ lambda: sse_client(url, headers, timeout, sse_read_timeout),
231
+ name=url,
232
+ events=True,
233
+ )
234
+
235
+
236
+ def create_server_stdio(
237
+ command: str,
238
+ args: list[str] = [],
239
+ cwd: str | Path | None = None,
240
+ env: dict[str, str] | None = None,
241
+ ) -> MCPServer:
242
+ return MCPServerImpl(
243
+ lambda: stdio_client(
244
+ StdioServerParameters(
245
+ command=command,
246
+ args=args,
247
+ cwd=cwd,
248
+ env=env,
249
+ )
250
+ ),
251
+ name=" ".join([command] + args),
252
+ events=True,
253
+ )
254
+
255
+
256
+ def create_server_sandbox(
257
+ command: str,
258
+ args: list[str] = [],
259
+ cwd: str | Path | None = None,
260
+ env: dict[str, str] | None = None,
261
+ sandbox: str | None = None,
262
+ ) -> MCPServer:
263
+ # TODO: Confirm the lifetime concepts. By the time a request makes it to the
264
+ # sandbox, it's going to need both a session id and a server "name".
265
+ name = " ".join([command] + args)
266
+ return MCPServerImpl(
267
+ lambda: sandbox_client(
268
+ StdioServerParameters(
269
+ command=command,
270
+ args=args,
271
+ cwd=cwd,
272
+ env=env,
273
+ ),
274
+ sandbox_name=sandbox,
275
+ ),
276
+ name=name,
277
+ events=False,
278
+ )
279
+
280
+
281
+ def tool_result_as_text(
282
+ content: list[TextContent | ImageContent | EmbeddedResource],
283
+ ) -> str:
284
+ content_list: list[str] = []
285
+ for c in content:
286
+ if isinstance(c, TextContent):
287
+ content_list.append(c.text)
288
+ elif isinstance(c, ImageContent):
289
+ content_list.append("(base64 encoded image ommitted)")
290
+ elif isinstance(c.resource, TextResourceContents):
291
+ content_list.append(c.resource.text)
292
+
293
+ return "\n\n".join(content_list)