inspect-ai 0.3.89__py3-none-any.whl → 0.3.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +40 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -53
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3583 -3508
  25. inspect_ai/_view/www/dist/assets/index.js +59212 -52521
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +13 -5
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +22 -8
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -9
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +83 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.91.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.89.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
inspect_ai/log/_file.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import re
3
3
  from logging import getLogger
4
+ from pathlib import Path
4
5
  from typing import Any, Callable, Generator, Literal
5
6
 
6
7
  from pydantic import BaseModel
@@ -97,7 +98,7 @@ def list_eval_logs(
97
98
 
98
99
  def write_eval_log(
99
100
  log: EvalLog,
100
- location: str | FileInfo | None = None,
101
+ location: str | Path | FileInfo | None = None,
101
102
  format: Literal["eval", "json", "auto"] = "auto",
102
103
  ) -> None:
103
104
  """Write an evaluation log.
@@ -121,7 +122,7 @@ def write_eval_log(
121
122
 
122
123
  async def write_eval_log_async(
123
124
  log: EvalLog,
124
- location: str | FileInfo | None = None,
125
+ location: str | Path | FileInfo | None = None,
125
126
  format: Literal["eval", "json", "auto"] = "auto",
126
127
  ) -> None:
127
128
  """Write an evaluation log.
@@ -140,7 +141,13 @@ async def write_eval_log_async(
140
141
  raise ValueError(
141
142
  "EvalLog passe to write_eval_log does not have a location, so you must pass an explicit location"
142
143
  )
143
- location = location if isinstance(location, str) else location.name
144
+ location = (
145
+ location
146
+ if isinstance(location, str)
147
+ else location.as_posix()
148
+ if isinstance(location, Path)
149
+ else location.name
150
+ )
144
151
 
145
152
  logger.debug(f"Writing eval log to {location}")
146
153
 
@@ -197,7 +204,7 @@ def write_log_dir_manifest(
197
204
 
198
205
 
199
206
  def read_eval_log(
200
- log_file: str | EvalLogInfo,
207
+ log_file: str | Path | EvalLogInfo,
201
208
  header_only: bool = False,
202
209
  resolve_attachments: bool = False,
203
210
  format: Literal["eval", "json", "auto"] = "auto",
@@ -235,7 +242,7 @@ def read_eval_log(
235
242
 
236
243
 
237
244
  async def read_eval_log_async(
238
- log_file: str | EvalLogInfo,
245
+ log_file: str | Path | EvalLogInfo,
239
246
  header_only: bool = False,
240
247
  resolve_attachments: bool = False,
241
248
  format: Literal["eval", "json", "auto"] = "auto",
@@ -255,7 +262,13 @@ async def read_eval_log_async(
255
262
  EvalLog object read from file.
256
263
  """
257
264
  # resolve to file path
258
- log_file = log_file if isinstance(log_file, str) else log_file.name
265
+ log_file = (
266
+ log_file
267
+ if isinstance(log_file, str)
268
+ else log_file.as_posix()
269
+ if isinstance(log_file, Path)
270
+ else log_file.name
271
+ )
259
272
  logger.debug(f"Reading eval log from {log_file}")
260
273
 
261
274
  # get recorder type
@@ -291,7 +304,7 @@ def read_eval_log_headers(
291
304
 
292
305
 
293
306
  async def read_eval_log_headers_async(
294
- log_files: list[str] | list[EvalLogInfo],
307
+ log_files: list[str] | list[Path] | list[EvalLogInfo],
295
308
  ) -> list[EvalLog]:
296
309
  return [
297
310
  await read_eval_log_async(log_file, header_only=True) for log_file in log_files
@@ -299,7 +312,7 @@ async def read_eval_log_headers_async(
299
312
 
300
313
 
301
314
  def read_eval_log_sample(
302
- log_file: str | EvalLogInfo,
315
+ log_file: str | Path | EvalLogInfo,
303
316
  id: int | str,
304
317
  epoch: int = 1,
305
318
  resolve_attachments: bool = False,
@@ -336,7 +349,7 @@ def read_eval_log_sample(
336
349
 
337
350
 
338
351
  async def read_eval_log_sample_async(
339
- log_file: str | EvalLogInfo,
352
+ log_file: str | Path | EvalLogInfo,
340
353
  id: int | str,
341
354
  epoch: int = 1,
342
355
  resolve_attachments: bool = False,
@@ -360,7 +373,13 @@ async def read_eval_log_sample_async(
360
373
  IndexError: If the passed id and epoch are not found.
361
374
  """
362
375
  # resolve to file path
363
- log_file = log_file if isinstance(log_file, str) else log_file.name
376
+ log_file = (
377
+ log_file
378
+ if isinstance(log_file, str)
379
+ else log_file.as_posix()
380
+ if isinstance(log_file, Path)
381
+ else log_file.name
382
+ )
364
383
 
365
384
  if format == "auto":
366
385
  recorder_type = recorder_type_for_location(log_file)
@@ -375,7 +394,7 @@ async def read_eval_log_sample_async(
375
394
 
376
395
 
377
396
  def read_eval_log_samples(
378
- log_file: str | EvalLogInfo,
397
+ log_file: str | Path | EvalLogInfo,
379
398
  all_samples_required: bool = True,
380
399
  resolve_attachments: bool = False,
381
400
  format: Literal["eval", "json", "auto"] = "auto",
inspect_ai/log/_log.py CHANGED
@@ -87,6 +87,9 @@ class EvalConfig(BaseModel):
87
87
  of samples fails.
88
88
  """
89
89
 
90
+ retry_on_error: int | None = Field(default=None)
91
+ """Number of times to retry samples if they encounter errors."""
92
+
90
93
  message_limit: int | None = Field(default=None)
91
94
  """Maximum messages to allow per sample."""
92
95
 
@@ -255,6 +258,9 @@ class EvalSample(BaseModel):
255
258
  error: EvalError | None = Field(default=None)
256
259
  """Error that halted sample."""
257
260
 
261
+ error_retries: list[EvalError] | None = Field(default=None)
262
+ """Errors that were retried for this sample."""
263
+
258
264
  attachments: dict[str, str] = Field(default_factory=dict)
259
265
  """Attachments referenced from messages and events.
260
266
 
@@ -703,7 +709,7 @@ def rich_traceback(
703
709
  exc_value=exc_value,
704
710
  traceback=exc_traceback,
705
711
  suppress=[click, asyncio, tenacity, sys.modules[PKG_NAME]],
706
- show_locals=False,
712
+ show_locals=os.environ.get("INSPECT_TRACEBACK_LOCALS", None) == "1",
707
713
  width=CONSOLE_DISPLAY_WIDTH,
708
714
  )
709
715
  return rich_tb
@@ -329,6 +329,9 @@ class ZipLogFile:
329
329
  limit=f"{sample.limit.type}"
330
330
  if sample.limit is not None
331
331
  else None,
332
+ retries=len(sample.error_retries)
333
+ if sample.error_retries is not None
334
+ else None,
332
335
  )
333
336
  )
334
337
  self._samples.clear()
@@ -20,6 +20,7 @@ class SampleSummary(BaseModel):
20
20
  scores: dict[str, Score] | None = Field(default=None)
21
21
  error: str | None = Field(default=None)
22
22
  limit: str | None = Field(default=None)
23
+ retries: int | None = Field(default=None)
23
24
 
24
25
  @model_validator(mode="after")
25
26
  def thin_scores(self) -> "SampleSummary":
@@ -18,6 +18,7 @@ class ActiveSample:
18
18
  self,
19
19
  *,
20
20
  task: str,
21
+ log_location: str,
21
22
  model: str,
22
23
  sample: Sample,
23
24
  epoch: int,
@@ -33,6 +34,7 @@ class ActiveSample:
33
34
  self.started: float | None = None
34
35
  self.completed: float | None = None
35
36
  self.task = task
37
+ self.log_location = log_location
36
38
  self.model = model
37
39
  self.sample = sample
38
40
  self.epoch = epoch
@@ -76,6 +78,7 @@ def init_active_samples() -> None:
76
78
  async def active_sample(
77
79
  *,
78
80
  task: str,
81
+ log_location: str,
79
82
  model: str,
80
83
  sample: Sample,
81
84
  epoch: int,
@@ -89,6 +92,7 @@ async def active_sample(
89
92
  # create the sample
90
93
  active = ActiveSample(
91
94
  task=task,
95
+ log_location=log_location,
92
96
  model=model,
93
97
  sample=sample,
94
98
  epoch=epoch,
@@ -3,6 +3,7 @@ import json
3
3
  import types
4
4
  from copy import copy
5
5
  from dataclasses import is_dataclass
6
+ from datetime import date, datetime, time
6
7
  from logging import getLogger
7
8
  from textwrap import dedent
8
9
  from types import UnionType
@@ -13,6 +14,8 @@ from typing import (
13
14
  List,
14
15
  NamedTuple,
15
16
  Optional,
17
+ Sequence,
18
+ Set,
16
19
  Tuple,
17
20
  Type,
18
21
  Union,
@@ -45,7 +48,12 @@ from inspect_ai._util.working import sample_waiting_time
45
48
  from inspect_ai.model._display import display_conversation_message
46
49
  from inspect_ai.model._model_output import ModelOutput
47
50
  from inspect_ai.tool import Tool, ToolCall, ToolError, ToolInfo
48
- from inspect_ai.tool._tool import ToolApprovalError, ToolParsingError, ToolResult
51
+ from inspect_ai.tool._tool import (
52
+ ToolApprovalError,
53
+ ToolParsingError,
54
+ ToolResult,
55
+ ToolSource,
56
+ )
49
57
  from inspect_ai.tool._tool_call import ToolCallContent, ToolCallError
50
58
  from inspect_ai.tool._tool_def import ToolDef, tool_defs
51
59
  from inspect_ai.tool._tool_info import parse_docstring
@@ -83,7 +91,7 @@ class ExecuteToolsResult(NamedTuple):
83
91
 
84
92
  async def execute_tools(
85
93
  messages: list[ChatMessage],
86
- tools: list[Tool] | list[ToolDef] | list[Tool | ToolDef],
94
+ tools: Sequence[Tool | ToolDef | ToolSource] | ToolSource,
87
95
  max_output: int | None = None,
88
96
  ) -> ExecuteToolsResult:
89
97
  """Perform tool calls in the last assistant message.
@@ -108,7 +116,7 @@ async def execute_tools(
108
116
  transcript,
109
117
  )
110
118
 
111
- tdefs = tool_defs(tools)
119
+ tdefs = await tool_defs(tools)
112
120
 
113
121
  async def call_tool_task(
114
122
  call: ToolCall,
@@ -385,7 +393,6 @@ async def call_tool(
385
393
 
386
394
  # normal tool call
387
395
  else:
388
- arguments = tool_params(call.arguments, tool_def.tool)
389
396
  result: ToolResult = await tool_def.tool(**arguments)
390
397
  return result, [], None, None
391
398
 
@@ -498,10 +505,7 @@ def prepend_agent_name(
498
505
 
499
506
 
500
507
  def tools_info(
501
- tools: list[Tool]
502
- | list[ToolDef]
503
- | list[ToolInfo]
504
- | list[Tool | ToolDef | ToolInfo],
508
+ tools: Sequence[Tool | ToolDef | ToolInfo],
505
509
  ) -> list[ToolInfo]:
506
510
  tools_info: list[ToolInfo] = []
507
511
  for tool in tools:
@@ -521,16 +525,14 @@ def tools_info(
521
525
 
522
526
 
523
527
  def disable_parallel_tools(
524
- tools: list[Tool]
525
- | list[ToolDef]
526
- | list[ToolInfo]
527
- | list[Tool | ToolDef | ToolInfo],
528
+ tools: Sequence[Tool | ToolDef | ToolInfo | ToolSource] | ToolSource,
528
529
  ) -> bool:
529
- for tool in tools:
530
- if isinstance(tool, Tool):
531
- tool = ToolDef(tool)
532
- if isinstance(tool, ToolDef) and not tool.parallel:
533
- return True
530
+ if not isinstance(tools, ToolSource):
531
+ for tool in tools:
532
+ if isinstance(tool, Tool):
533
+ tool = ToolDef(tool)
534
+ if isinstance(tool, ToolDef) and not tool.parallel:
535
+ return True
534
536
  return False
535
537
 
536
538
 
@@ -598,6 +600,15 @@ def tool_param(type_hint: Type[Any], input: Any) -> Any:
598
600
  raise ToolParsingError(
599
601
  f"Unable to convert '{input}' to {type_hint.__name__}"
600
602
  )
603
+ elif type_hint == datetime:
604
+ if input.endswith("Z"):
605
+ # convert trailing Z to +00:00
606
+ input = input[:-1] + "+00:00"
607
+ return datetime.fromisoformat(input)
608
+ elif type_hint == date:
609
+ return date.fromisoformat(input)
610
+ elif type_hint == time:
611
+ return time.fromisoformat(input)
601
612
  elif is_typeddict(type_hint):
602
613
  typeddict_data: dict[str, Any] = {}
603
614
  annotations = get_type_hints(type_hint)
@@ -619,6 +630,11 @@ def tool_param(type_hint: Type[Any], input: Any) -> Any:
619
630
  return [tool_param(args[0], x) for x in input]
620
631
  else:
621
632
  return input
633
+ elif origin is set or origin is Set:
634
+ if args:
635
+ return {tool_param(args[0], x) for x in input}
636
+ else:
637
+ return set(input)
622
638
  elif origin is tuple or origin is Tuple:
623
639
  if args:
624
640
  return tuple([tool_param(args[0], x) for x in input])
@@ -29,7 +29,7 @@ class GenerateConfigArgs(TypedDict, total=False):
29
29
  """Type for kwargs that selectively override GenerateConfig."""
30
30
 
31
31
  max_retries: int | None
32
- """Maximum number of times to retry request (defaults to 5)."""
32
+ """Maximum number of times to retry request (defaults to unlimited)."""
33
33
 
34
34
  timeout: int | None
35
35
  """Request timeout (in seconds)."""
@@ -97,6 +97,9 @@ class GenerateConfigArgs(TypedDict, total=False):
97
97
  reasoning_tokens: int | None
98
98
  """Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
99
99
 
100
+ reasoning_summary: Literal["concise", "detailed", "auto"] | None
101
+ """Provide summary of reasoning steps (defaults to no summary). Use 'auto' to access the most detailed summarizer available for the current model. OpenAI reasoning models only."""
102
+
100
103
  reasoning_history: Literal["none", "all", "last", "auto"] | None
101
104
  """Include reasoning in chat message history sent to generate."""
102
105
 
@@ -108,7 +111,7 @@ class GenerateConfig(BaseModel):
108
111
  """Model generation options."""
109
112
 
110
113
  max_retries: int | None = Field(default=None)
111
- """Maximum number of times to retry request (defaults to 5)."""
114
+ """Maximum number of times to retry request (defaults to unlimited)."""
112
115
 
113
116
  timeout: int | None = Field(default=None)
114
117
  """Request timeout (in seconds)."""
@@ -176,6 +179,11 @@ class GenerateConfig(BaseModel):
176
179
  reasoning_tokens: int | None = Field(default=None)
177
180
  """Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
178
181
 
182
+ reasoning_summary: Literal["concise", "detailed", "auto"] | None = Field(
183
+ default=None
184
+ )
185
+ """Provide summary of reasoning steps (defaults to no summary). Use 'auto' to access the most detailed summarizer available for the current model. OpenAI reasoning models only."""
186
+
179
187
  reasoning_history: Literal["none", "all", "last", "auto"] | None = Field(
180
188
  default=None
181
189
  )
@@ -9,7 +9,15 @@ from contextvars import ContextVar
9
9
  from copy import copy, deepcopy
10
10
  from datetime import datetime
11
11
  from types import TracebackType
12
- from typing import Any, AsyncIterator, Callable, Literal, Type, cast
12
+ from typing import (
13
+ Any,
14
+ AsyncIterator,
15
+ Callable,
16
+ Literal,
17
+ Sequence,
18
+ Type,
19
+ cast,
20
+ )
13
21
 
14
22
  from pydantic_core import to_jsonable_python
15
23
  from tenacity import (
@@ -45,6 +53,7 @@ from inspect_ai._util.retry import report_http_retry
45
53
  from inspect_ai._util.trace import trace_action
46
54
  from inspect_ai._util.working import report_sample_waiting_time, sample_working_time
47
55
  from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
56
+ from inspect_ai.tool._tool import ToolSource
48
57
  from inspect_ai.tool._tool_call import ToolCallModelInputHints
49
58
  from inspect_ai.tool._tool_def import ToolDef, tool_defs
50
59
  from inspect_ai.util import concurrency
@@ -54,7 +63,9 @@ from ._call_tools import (
54
63
  disable_parallel_tools,
55
64
  execute_tools,
56
65
  tool_call_view,
57
- tools_info,
66
+ )
67
+ from ._call_tools import (
68
+ tools_info as get_tools_info,
58
69
  )
59
70
  from ._chat_message import (
60
71
  ChatMessage,
@@ -326,10 +337,7 @@ class Model:
326
337
  async def generate(
327
338
  self,
328
339
  input: str | list[ChatMessage],
329
- tools: list[Tool]
330
- | list[ToolDef]
331
- | list[ToolInfo]
332
- | list[Tool | ToolDef | ToolInfo] = [],
340
+ tools: Sequence[Tool | ToolDef | ToolInfo | ToolSource] | ToolSource = [],
333
341
  tool_choice: ToolChoice | None = None,
334
342
  config: GenerateConfig = GenerateConfig(),
335
343
  cache: bool | CachePolicy = False,
@@ -422,7 +430,7 @@ class Model:
422
430
  async def generate_loop(
423
431
  self,
424
432
  input: str | list[ChatMessage],
425
- tools: list[Tool] | list[ToolDef] | list[Tool | ToolDef] = [],
433
+ tools: Sequence[Tool | ToolDef | ToolSource] | ToolSource = [],
426
434
  config: GenerateConfig = GenerateConfig(),
427
435
  cache: bool | CachePolicy = False,
428
436
  ) -> tuple[list[ChatMessage], ModelOutput]:
@@ -471,10 +479,7 @@ class Model:
471
479
  async def _generate(
472
480
  self,
473
481
  input: list[ChatMessage],
474
- tools: list[Tool]
475
- | list[ToolDef]
476
- | list[ToolInfo]
477
- | list[Tool | ToolDef | ToolInfo],
482
+ tools: Sequence[Tool | ToolDef | ToolInfo | ToolSource] | ToolSource,
478
483
  tool_choice: ToolChoice | None,
479
484
  config: GenerateConfig,
480
485
  cache: bool | CachePolicy = False,
@@ -482,15 +487,30 @@ class Model:
482
487
  # default to 'auto' for tool_choice (same as underlying model apis)
483
488
  tool_choice = tool_choice if tool_choice else "auto"
484
489
 
490
+ # resolve top level tool source
491
+ if isinstance(tools, ToolSource):
492
+ tools = await tools.tools()
493
+
494
+ # resolve tool sources
495
+ resolved_tools: list[Tool | ToolDef | ToolInfo] = []
496
+ for tool in tools:
497
+ if isinstance(tool, ToolSource):
498
+ source_tools = await tool.tools()
499
+ resolved_tools.extend(source_tools)
500
+ else:
501
+ resolved_tools.append(tool)
502
+
485
503
  # extract tool defs if we can
486
- tdefs = tool_defs([tool for tool in tools if not isinstance(tool, ToolInfo)])
504
+ tdefs = await tool_defs(
505
+ [tool for tool in resolved_tools if not isinstance(tool, ToolInfo)]
506
+ )
487
507
 
488
508
  # resolve all tools into tool_info
489
- tools = tools_info(tools)
509
+ tools_info = get_tools_info(resolved_tools)
490
510
 
491
511
  # if we have a specific tool selected then filter out the others
492
512
  if isinstance(tool_choice, ToolFunction):
493
- tools = [tool for tool in tools if tool.name == tool_choice.name]
513
+ tools_info = [tool for tool in tools_info if tool.name == tool_choice.name]
494
514
 
495
515
  # if tool_choice is "none" or if there are no tools then fully purge
496
516
  # the tools (as some models (e.g. openai and mistral) get confused
@@ -498,11 +518,11 @@ class Model:
498
518
  # (they both 'semi' use the tool by placing the arguments in JSON
499
519
  # in their output!). on the other hand, anthropic actually errors if
500
520
  # there are tools anywhere in the message stream and no tools defined.
501
- if tool_choice == "none" or len(tools) == 0:
521
+ if tool_choice == "none" or len(tools_info) == 0:
502
522
  # allow model providers to implement a tools_required() method to
503
523
  # force tools to be passed (we need this for anthropic)
504
524
  if not self.api.tools_required():
505
- tools = []
525
+ tools_info = []
506
526
  tool_choice = "none"
507
527
 
508
528
  # handle reasoning history
@@ -569,13 +589,13 @@ class Model:
569
589
  model=str(self),
570
590
  policy=policy,
571
591
  tool_choice=tool_choice,
572
- tools=tools,
592
+ tools=tools_info,
573
593
  )
574
594
  existing = cache_fetch(cache_entry)
575
595
  if isinstance(existing, ModelOutput):
576
596
  self._record_model_interaction(
577
597
  input=input,
578
- tools=tools,
598
+ tools=tools_info,
579
599
  tool_choice=tool_choice,
580
600
  config=config,
581
601
  cache="read",
@@ -593,7 +613,7 @@ class Model:
593
613
  # (we'll update it with the results once we have them)
594
614
  complete = self._record_model_interaction(
595
615
  input=input,
596
- tools=tools,
616
+ tools=tools_info,
597
617
  tool_choice=tool_choice,
598
618
  config=config,
599
619
  cache="write" if cache else None,
@@ -604,7 +624,7 @@ class Model:
604
624
  try:
605
625
  result = await self.api.generate(
606
626
  input=input,
607
- tools=tools,
627
+ tools=tools_info,
608
628
  tool_choice=tool_choice,
609
629
  config=config,
610
630
  )
@@ -1371,7 +1391,7 @@ def combine_messages(
1371
1391
  def log_model_retry(model_name: str, retry_state: RetryCallState) -> None:
1372
1392
  logger.log(
1373
1393
  HTTP,
1374
- f"-> {model_name} retry {retry_state.attempt_number} after waiting for {retry_state.idle_for}",
1394
+ f"-> {model_name} retry {retry_state.attempt_number} (retrying in {retry_state.upcoming_sleep:,.0f} seconds)",
1375
1395
  )
1376
1396
 
1377
1397
 
@@ -3,6 +3,7 @@ from typing import Any, Literal, Type
3
3
 
4
4
  from pydantic import BaseModel, Field, JsonValue, model_validator
5
5
 
6
+ from inspect_ai._util.content import Content
6
7
  from inspect_ai.tool._tool_call import ToolCall
7
8
 
8
9
  from ._chat_message import ChatMessageAssistant
@@ -165,7 +166,7 @@ class ModelOutput(BaseModel):
165
166
  @staticmethod
166
167
  def from_content(
167
168
  model: str,
168
- content: str,
169
+ content: str | list[Content],
169
170
  stop_reason: StopReason = "stop",
170
171
  error: str | None = None,
171
172
  ) -> "ModelOutput":
@@ -82,16 +82,16 @@ def is_o_series(name: str) -> bool:
82
82
  return not is_gpt(name) and bool(re.search(r"o\d+", name))
83
83
 
84
84
 
85
- def is_o1_pro(name: str) -> bool:
86
- return "o1-pro" in name
85
+ def is_o1(name: str) -> bool:
86
+ return "o1" in name and not is_o1_early(name)
87
87
 
88
88
 
89
- def is_o1_mini(name: str) -> bool:
90
- return "o1-mini" in name
89
+ def is_o1_early(name: str) -> bool:
90
+ return "o1-mini" in name or "o1-preview" in name
91
91
 
92
92
 
93
- def is_o1_preview(name: str) -> bool:
94
- return "o1-preview" in name
93
+ def is_o3_mini(name: str) -> bool:
94
+ return "o3-mini" in name
95
95
 
96
96
 
97
97
  def is_computer_use_preview(name: str) -> bool:
@@ -423,10 +423,12 @@ def chat_messages_from_openai(
423
423
  "reasoning", None
424
424
  )
425
425
  if reasoning is not None:
426
+ # normalize content to an array
426
427
  if isinstance(content, str):
427
428
  content = [ContentText(text=content, refusal=refusal)]
428
- else:
429
- content.insert(0, ContentReasoning(reasoning=str(reasoning)))
429
+
430
+ # insert reasoning
431
+ content.insert(0, ContentReasoning(reasoning=str(reasoning)))
430
432
 
431
433
  # return message
432
434
  if "tool_calls" in message: