inspect-ai 0.3.90__py3-none-any.whl → 0.3.92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. inspect_ai/_cli/common.py +13 -0
  2. inspect_ai/_cli/eval.py +44 -0
  3. inspect_ai/_display/textual/widgets/samples.py +49 -4
  4. inspect_ai/_display/textual/widgets/vscode.py +4 -2
  5. inspect_ai/_eval/eval.py +41 -28
  6. inspect_ai/_eval/evalset.py +4 -0
  7. inspect_ai/_eval/loader.py +4 -5
  8. inspect_ai/_eval/registry.py +1 -1
  9. inspect_ai/_eval/run.py +6 -3
  10. inspect_ai/_eval/task/log.py +6 -0
  11. inspect_ai/_eval/task/run.py +108 -41
  12. inspect_ai/_eval/task/sandbox.py +19 -5
  13. inspect_ai/_util/_async.py +1 -1
  14. inspect_ai/_util/constants.py +1 -0
  15. inspect_ai/_util/environ.py +32 -0
  16. inspect_ai/_util/file.py +8 -1
  17. inspect_ai/_util/httpx.py +105 -22
  18. inspect_ai/_util/registry.py +83 -9
  19. inspect_ai/_util/text.py +81 -17
  20. inspect_ai/_util/transcript.py +9 -6
  21. inspect_ai/_util/vscode.py +7 -2
  22. inspect_ai/_view/schema.py +1 -1
  23. inspect_ai/_view/www/babel.config.js +11 -0
  24. inspect_ai/_view/www/dist/assets/index.css +3640 -3563
  25. inspect_ai/_view/www/dist/assets/index.js +59204 -52519
  26. inspect_ai/_view/www/eslint.config.mjs +10 -1
  27. inspect_ai/_view/www/jest.config.mjs +21 -0
  28. inspect_ai/_view/www/log-schema.json +111 -2
  29. inspect_ai/_view/www/package.json +19 -5
  30. inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
  31. inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
  32. inspect_ai/_view/www/src/app/App.tsx +168 -0
  33. inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
  34. inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
  35. inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
  36. inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
  37. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
  38. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
  39. inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
  40. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
  41. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
  42. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
  43. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
  44. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
  45. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
  46. inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
  47. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
  48. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
  49. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
  50. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
  51. inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
  52. inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
  53. inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
  54. inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
  55. inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
  56. inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
  57. inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
  58. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
  59. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
  60. inspect_ai/_view/www/src/app/routing/url.ts +43 -0
  61. inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
  62. inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
  63. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
  64. inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
  65. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
  66. inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
  67. inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
  68. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
  69. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
  70. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
  71. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
  72. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
  73. inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
  74. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
  75. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
  76. inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
  77. inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
  78. inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
  79. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
  80. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
  81. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
  82. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
  83. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
  84. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
  85. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
  86. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
  87. inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
  88. inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
  89. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
  90. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  91. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
  92. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
  93. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
  94. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
  95. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
  96. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
  98. inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
  99. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
  100. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
  101. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
  102. inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
  103. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
  104. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
  105. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
  106. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
  107. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
  108. inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
  109. inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
  110. inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
  111. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
  112. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
  113. inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
  114. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
  115. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
  116. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
  117. inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
  118. inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
  119. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
  120. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
  121. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
  122. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
  123. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
  124. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
  125. inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
  126. inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
  127. inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
  128. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
  129. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
  130. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
  131. inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
  132. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
  133. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
  134. inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
  135. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
  136. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
  137. inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
  138. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
  139. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
  140. inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
  141. inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
  142. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
  143. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
  144. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
  145. inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
  146. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
  147. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
  148. inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
  149. inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
  150. inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
  151. inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
  152. inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
  153. inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
  154. inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
  155. inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
  156. inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
  157. inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
  158. inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
  159. inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
  160. inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
  161. inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
  162. inspect_ai/_view/www/src/components/Card.tsx +1 -1
  163. inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
  164. inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
  165. inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
  166. inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
  167. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
  168. inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
  169. inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
  170. inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
  171. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
  172. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
  173. inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
  174. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
  175. inspect_ai/_view/www/src/constants.ts +10 -9
  176. inspect_ai/_view/www/src/index.tsx +27 -11
  177. inspect_ai/_view/www/src/state/appSlice.ts +44 -5
  178. inspect_ai/_view/www/src/state/hooks.ts +30 -7
  179. inspect_ai/_view/www/src/state/logSlice.ts +7 -5
  180. inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
  181. inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
  182. inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
  183. inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
  184. inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
  185. inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
  186. inspect_ai/_view/www/src/state/store.ts +9 -7
  187. inspect_ai/_view/www/src/state/utils.ts +1 -1
  188. inspect_ai/_view/www/src/tests/README.md +49 -0
  189. inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
  190. inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
  191. inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
  192. inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
  193. inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
  194. inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
  195. inspect_ai/_view/www/src/utils/format.ts +8 -2
  196. inspect_ai/_view/www/src/utils/path.ts +14 -2
  197. inspect_ai/_view/www/src/utils/polling.ts +1 -2
  198. inspect_ai/_view/www/src/utils/uri.ts +32 -0
  199. inspect_ai/_view/www/yarn.lock +3310 -382
  200. inspect_ai/agent/_handoff.py +6 -3
  201. inspect_ai/agent/_human/agent.py +5 -3
  202. inspect_ai/agent/_human/install.py +16 -7
  203. inspect_ai/agent/_human/panel.py +14 -1
  204. inspect_ai/agent/_human/service.py +5 -1
  205. inspect_ai/agent/_react.py +161 -128
  206. inspect_ai/agent/_types.py +15 -4
  207. inspect_ai/approval/_policy.py +2 -2
  208. inspect_ai/log/_file.py +30 -11
  209. inspect_ai/log/_log.py +7 -1
  210. inspect_ai/log/_recorders/eval.py +3 -0
  211. inspect_ai/log/_recorders/types.py +1 -0
  212. inspect_ai/log/_samples.py +4 -0
  213. inspect_ai/model/_call_tools.py +33 -17
  214. inspect_ai/model/_generate_config.py +10 -2
  215. inspect_ai/model/_model.py +41 -21
  216. inspect_ai/model/_model_output.py +2 -1
  217. inspect_ai/model/_openai.py +10 -8
  218. inspect_ai/model/_openai_responses.py +95 -42
  219. inspect_ai/model/_providers/anthropic.py +14 -12
  220. inspect_ai/model/_providers/google.py +191 -95
  221. inspect_ai/model/_providers/hf.py +1 -1
  222. inspect_ai/model/_providers/mistral.py +2 -3
  223. inspect_ai/model/_providers/openai.py +54 -17
  224. inspect_ai/model/_providers/openai_o1.py +1 -1
  225. inspect_ai/model/_providers/openai_responses.py +28 -16
  226. inspect_ai/model/_providers/openrouter.py +14 -0
  227. inspect_ai/model/_providers/providers.py +2 -2
  228. inspect_ai/model/_providers/util/chatapi.py +17 -7
  229. inspect_ai/model/_providers/vllm.py +1 -1
  230. inspect_ai/scorer/_metric.py +17 -1
  231. inspect_ai/scorer/_model.py +51 -6
  232. inspect_ai/scorer/_scorer.py +1 -1
  233. inspect_ai/solver/_human_agent.py +3 -0
  234. inspect_ai/solver/_plan.py +1 -1
  235. inspect_ai/solver/_solver.py +1 -1
  236. inspect_ai/solver/_use_tools.py +14 -8
  237. inspect_ai/tool/__init__.py +16 -1
  238. inspect_ai/tool/_json_rpc_helpers.py +285 -0
  239. inspect_ai/tool/_mcp/__init__.py +13 -0
  240. inspect_ai/tool/_mcp/_context.py +14 -0
  241. inspect_ai/tool/_mcp/_mcp.py +293 -0
  242. inspect_ai/tool/_mcp/_sandbox.py +104 -0
  243. inspect_ai/tool/_mcp/_types.py +31 -0
  244. inspect_ai/tool/_mcp/connection.py +60 -0
  245. inspect_ai/tool/_mcp/sampling.py +118 -0
  246. inspect_ai/tool/_mcp/server.py +112 -0
  247. inspect_ai/tool/_mcp/tools.py +34 -0
  248. inspect_ai/tool/_tool.py +13 -0
  249. inspect_ai/tool/_tool_def.py +24 -7
  250. inspect_ai/tool/_tool_support_helpers.py +129 -153
  251. inspect_ai/tool/_tools/_bash_session.py +11 -11
  252. inspect_ai/tool/_tools/_text_editor.py +6 -6
  253. inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
  254. inspect_ai/util/_anyio.py +31 -20
  255. inspect_ai/util/_json.py +20 -2
  256. inspect_ai/util/_sandbox/context.py +18 -7
  257. inspect_ai/util/_sandbox/docker/compose.py +1 -1
  258. inspect_ai/util/_sandbox/docker/docker.py +92 -21
  259. inspect_ai/util/_sandbox/environment.py +33 -2
  260. inspect_ai/util/_sandbox/events.py +2 -2
  261. inspect_ai/util/_sandbox/service.py +13 -3
  262. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/METADATA +6 -2
  263. inspect_ai-0.3.92.dist-info/RECORD +732 -0
  264. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/WHEEL +1 -1
  265. inspect_ai/_view/www/src/App.tsx +0 -316
  266. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
  267. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
  268. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
  269. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
  270. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
  271. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
  272. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
  273. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
  274. inspect_ai-0.3.90.dist-info/RECORD +0 -705
  275. /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
  276. /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
  277. /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
  278. /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
  279. /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
  280. /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
  281. /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
  282. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
  283. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
  284. /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
  285. /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
  286. /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
  287. /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
  288. /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
  289. /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
  290. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
  291. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
  292. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
  293. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
  294. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
  295. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
  296. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
  297. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
  298. /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
  299. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
  300. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
  301. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
  302. /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
  303. /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
  304. /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
  305. /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
  306. /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
  307. /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
  308. /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
  309. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
  310. /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
  311. /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
  312. /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
  313. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
  314. /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
  315. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
  316. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
  317. /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
  318. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
  319. /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
  320. /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
  321. /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
  322. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
  323. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
  324. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
  325. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
  326. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
  327. /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
  328. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
  329. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
  330. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
  331. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
  332. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
  333. /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
  334. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
  335. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
  336. /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
  337. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
  338. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
  339. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
  340. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
  341. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
  342. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
  343. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
  344. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
  345. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
  346. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
  347. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
  348. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
  349. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
  350. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
  351. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
  352. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
  353. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
  354. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
  355. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
  356. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
  357. /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
  358. /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
  359. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
  360. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
  361. /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
  362. /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
  363. /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
  364. /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
  365. /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
  366. /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
  367. /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
  368. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/entry_points.txt +0 -0
  369. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/licenses/LICENSE +0 -0
  370. {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import contextlib
2
2
  import functools
3
3
  import sys
4
4
  import time
5
- from copy import deepcopy
5
+ from copy import copy, deepcopy
6
6
  from dataclasses import dataclass, field
7
7
  from datetime import datetime
8
8
  from logging import getLogger
@@ -307,6 +307,7 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
307
307
  functools.partial(
308
308
  task_run_sample,
309
309
  task_name=task.name,
310
+ log_location=profile.log_location,
310
311
  sample=sample,
311
312
  state=state,
312
313
  sandbox=sandbox,
@@ -325,6 +326,8 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
325
326
  config.fail_on_error is None
326
327
  or config.fail_on_error is True
327
328
  ),
329
+ retry_on_error=config.retry_on_error or 0,
330
+ error_retries=[],
328
331
  time_limit=config.time_limit,
329
332
  working_limit=config.working_limit,
330
333
  semaphore=sample_semaphore,
@@ -484,7 +487,9 @@ def update_metrics_display_fn(
484
487
 
485
488
 
486
489
  async def task_run_sample(
490
+ *,
487
491
  task_name: str,
492
+ log_location: str,
488
493
  sample: Sample,
489
494
  state: TaskState,
490
495
  sandbox: SandboxEnvironmentSpec | None,
@@ -500,6 +505,8 @@ async def task_run_sample(
500
505
  sample_error: SampleErrorHandler,
501
506
  sample_complete: Callable[[dict[str, SampleScore]], None],
502
507
  fails_on_error: bool,
508
+ retry_on_error: int,
509
+ error_retries: list[EvalError],
503
510
  time_limit: int | None,
504
511
  working_limit: int | None,
505
512
  semaphore: anyio.Semaphore | None,
@@ -531,6 +538,9 @@ async def task_run_sample(
531
538
  sample_complete(sample_scores)
532
539
  return sample_scores
533
540
 
541
+ # copy variables that we may pass back to ourselves on a retry
542
+ initial_state = deepcopy(state)
543
+
534
544
  # use semaphore if provided
535
545
  semaphore_cm: anyio.Semaphore | contextlib.AbstractAsyncContextManager[None] = (
536
546
  semaphore if semaphore else contextlib.nullcontext()
@@ -561,20 +571,31 @@ async def task_run_sample(
561
571
 
562
572
  # helper to handle exceptions (will throw if we've exceeded the limit)
563
573
  def handle_error(ex: BaseException) -> tuple[EvalError, BaseException | None]:
564
- err = sample_error(ex)
565
- # if we aren't raising the error then print a warning
566
- if err[1] is None:
567
- py_logger.warning(
568
- f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
569
- )
570
- transcript()._event(ErrorEvent(error=err[0]))
571
- return err
574
+ # helper to log sample error
575
+ def log_sample_error() -> None:
576
+ msg = f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
577
+ if retry_on_error > 0:
578
+ msg = f"{msg}. Sample will be retried."
579
+ py_logger.warning(msg)
580
+
581
+ # if we have retries left then return EvalError
582
+ if retry_on_error > 0:
583
+ log_sample_error()
584
+ return eval_error(ex, type(ex), ex, ex.__traceback__), None
585
+ else:
586
+ err = sample_error(ex)
587
+ # if we aren't raising the error then print a warning
588
+ if err[1] is None:
589
+ log_sample_error()
590
+ transcript()._event(ErrorEvent(error=err[0]))
591
+ return err
572
592
 
573
593
  # solver loop
574
594
  async with (
575
595
  semaphore_cm,
576
596
  active_sample(
577
597
  task=task_name,
598
+ log_location=log_location,
578
599
  model=str(state.model),
579
600
  sample=sample,
580
601
  epoch=state.epoch,
@@ -582,7 +603,7 @@ async def task_run_sample(
582
603
  token_limit=state.token_limit,
583
604
  time_limit=time_limit,
584
605
  working_limit=working_limit,
585
- fails_on_error=fails_on_error,
606
+ fails_on_error=fails_on_error or (retry_on_error > 0),
586
607
  transcript=sample_transcript,
587
608
  ) as active,
588
609
  ):
@@ -606,7 +627,7 @@ async def task_run_sample(
606
627
 
607
628
  async with sandboxenv_cm:
608
629
  timeout_cm: (
609
- contextlib._GeneratorContextManager[anyio.CancelScope, None, None]
630
+ contextlib._GeneratorContextManager[anyio.CancelScope]
610
631
  | contextlib.nullcontext[None]
611
632
  ) = contextlib.nullcontext()
612
633
  try:
@@ -791,40 +812,84 @@ async def task_run_sample(
791
812
  except Exception as ex:
792
813
  error, raise_error = handle_error(ex)
793
814
 
794
- # complete the sample
795
- progress(SAMPLE_TOTAL_PROGRESS_UNITS)
815
+ # complete the sample if there is no error or if there is no retry_on_error in play
816
+ if not error or (retry_on_error == 0):
817
+ progress(SAMPLE_TOTAL_PROGRESS_UNITS)
818
+
819
+ # log it
820
+ if logger is not None:
821
+ # if we are logging images then be sure to base64 images injected by solvers
822
+ if log_images:
823
+ state = (await states_with_base64_content([state]))[0]
824
+
825
+ # otherwise ensure there are no base64 images in sample or messages
826
+ else:
827
+ sample = sample_without_base64_content(sample)
828
+ state = state_without_base64_content(state)
829
+
830
+ # log the sample
831
+ await log_sample(
832
+ start_time=start_time,
833
+ logger=logger,
834
+ sample=sample,
835
+ state=state,
836
+ scores=results,
837
+ error=error,
838
+ error_retries=error_retries,
839
+ log_images=log_images,
840
+ )
796
841
 
797
- # log it
842
+ # error that should be retried (we do this outside of the above scope so that we can
843
+ # retry outside of the original semaphore -- our retry will therefore go to the back
844
+ # of the sample queue)
845
+ if error and retry_on_error > 0:
846
+ # remove any buffered sample events
798
847
  if logger is not None:
799
- # if we are logging images then be sure to base64 images injected by solvers
800
- if log_images:
801
- state = (await states_with_base64_content([state]))[0]
848
+ logger.remove_sample(state.sample_id, state.epoch)
802
849
 
803
- # otherwise ensure there are no base64 images in sample or messages
804
- else:
805
- sample = sample_without_base64_content(sample)
806
- state = state_without_base64_content(state)
807
-
808
- # log the sample
809
- await log_sample(
810
- start_time=start_time,
811
- logger=logger,
812
- sample=sample,
813
- state=state,
814
- scores=results,
815
- error=error,
816
- log_images=log_images,
817
- )
850
+ # recurse w/ tick down of retry_on_error and append of error to error_retries
851
+ return await task_run_sample(
852
+ task_name=task_name,
853
+ log_location=log_location,
854
+ sample=sample,
855
+ # state was deep copied at the outset
856
+ state=initial_state,
857
+ sandbox=sandbox,
858
+ max_sandboxes=max_sandboxes,
859
+ sandbox_cleanup=sandbox_cleanup,
860
+ plan=plan,
861
+ scorers=scorers,
862
+ generate=generate,
863
+ progress=progress,
864
+ logger=logger,
865
+ log_images=log_images,
866
+ sample_source=sample_source,
867
+ sample_error=sample_error,
868
+ sample_complete=sample_complete,
869
+ fails_on_error=fails_on_error,
870
+ # tick retry count down
871
+ retry_on_error=retry_on_error - 1,
872
+ # forward on error that caused retry
873
+ error_retries=copy(error_retries) + [error],
874
+ time_limit=time_limit,
875
+ working_limit=working_limit,
876
+ semaphore=semaphore,
877
+ )
818
878
 
819
- # return
820
- if error is None:
821
- if results is not None:
822
- sample_complete(results)
823
- return results
824
- elif raise_error:
825
- raise raise_error
826
- else:
827
- return None
879
+ # no error
880
+ elif error is None:
881
+ # call sample_complete callback if we have score results
882
+ if results is not None:
883
+ sample_complete(results)
884
+ return results
885
+
886
+ # we have an error and should raise it
887
+ elif raise_error is not None:
888
+ raise raise_error
889
+
890
+ # we have an error and should not raise it
891
+ else:
892
+ return None
828
893
 
829
894
 
830
895
  async def log_sample(
@@ -834,6 +899,7 @@ async def log_sample(
834
899
  state: TaskState,
835
900
  scores: dict[str, SampleScore],
836
901
  error: EvalError | None,
902
+ error_retries: list[EvalError],
837
903
  log_images: bool,
838
904
  ) -> None:
839
905
  # sample must have id to be logged
@@ -879,6 +945,7 @@ async def log_sample(
879
945
  if total_time is not None
880
946
  else None,
881
947
  error=error,
948
+ error_retries=error_retries,
882
949
  limit=limit,
883
950
  )
884
951
 
@@ -17,6 +17,7 @@ from inspect_ai._eval.task.task import Task
17
17
  from inspect_ai._eval.task.util import task_run_dir
18
18
  from inspect_ai._util.file import file, filesystem
19
19
  from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
20
+ from inspect_ai._util.path import chdir
20
21
  from inspect_ai._util.registry import registry_unqualified_name
21
22
  from inspect_ai._util.url import data_uri_to_base64, is_data_uri, is_http_url
22
23
  from inspect_ai.dataset import Sample
@@ -29,6 +30,7 @@ from inspect_ai.util._sandbox.environment import (
29
30
  SandboxEnvironment,
30
31
  SandboxEnvironmentConfigType,
31
32
  SandboxEnvironmentSpec,
33
+ TaskInitEnvironment,
32
34
  )
33
35
  from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
34
36
 
@@ -42,7 +44,7 @@ async def sandboxenv_context(
42
44
  sample: Sample,
43
45
  ) -> AsyncGenerator[None, None]:
44
46
  # resolve sandbox
45
- sandbox = resolve_sandbox(sandbox, sample)
47
+ sandbox = await resolve_sandbox(sandbox, sample)
46
48
  if not sandbox:
47
49
  raise ValueError("sandboxenv_context called with no sandbox specified")
48
50
 
@@ -143,22 +145,34 @@ async def read_sandboxenv_file(contents: str) -> bytes:
143
145
  class TaskSandboxEnvironment(NamedTuple):
144
146
  sandbox: SandboxEnvironmentSpec
145
147
  run_dir: str
148
+ env: tuple[tuple[str, str], ...]
146
149
 
147
150
 
148
- def resolve_sandbox_for_task(
151
+ async def resolve_sandbox_for_task_and_sample(
149
152
  eval_sandbox: SandboxEnvironmentSpec | None,
150
153
  task: Task,
151
154
  sample: Sample,
152
155
  ) -> TaskSandboxEnvironment | None:
153
156
  # eval_sandbox overrides task or sample sandbox
154
- sandbox = eval_sandbox or resolve_sandbox(task.sandbox, sample)
157
+ sandbox = eval_sandbox or await resolve_sandbox(task.sandbox, sample)
155
158
  if sandbox is not None:
156
- return TaskSandboxEnvironment(sandbox, task_run_dir(task))
159
+ # see if there are environment variables required for init of this sample
160
+ run_dir = task_run_dir(task)
161
+ with chdir(run_dir):
162
+ sandboxenv_type = registry_find_sandboxenv(sandbox.type)
163
+ task_init_environment = cast(
164
+ TaskInitEnvironment, getattr(sandboxenv_type, "task_init_environment")
165
+ )
166
+ env = await task_init_environment(sandbox.config, sample.metadata or {})
167
+
168
+ return TaskSandboxEnvironment(
169
+ sandbox=sandbox, run_dir=run_dir, env=tuple(sorted(env.items()))
170
+ )
157
171
  else:
158
172
  return None
159
173
 
160
174
 
161
- def resolve_sandbox(
175
+ async def resolve_sandbox(
162
176
  sandbox: SandboxEnvironmentSpec | None,
163
177
  sample: Sample,
164
178
  ) -> SandboxEnvironmentSpec | None:
@@ -69,7 +69,7 @@ async def tg_collect(
69
69
  if exception_group:
70
70
  raise
71
71
  else:
72
- raise ex.exceptions[0]
72
+ raise ex.exceptions[0] from None
73
73
 
74
74
 
75
75
  async def coro_print_exceptions(
@@ -26,6 +26,7 @@ ALL_LOG_LEVELS = [
26
26
  DEFAULT_LOG_LEVEL = "warning"
27
27
  DEFAULT_LOG_LEVEL_TRANSCRIPT = "info"
28
28
  DEFAULT_LOG_SHARED = 10
29
+ DEFAULT_RETRY_ON_ERROR = 1
29
30
  ALL_LOG_FORMATS = ["eval", "json"]
30
31
  DEFAULT_LOG_FORMAT: Literal["eval", "json"] = "eval"
31
32
  JSON_LOG_FORMAT = "json"
@@ -24,3 +24,35 @@ def environ_var(name: str, value: str) -> Iterator[None]:
24
24
  os.environ.pop(name, None)
25
25
  else:
26
26
  os.environ[name] = previous_value
27
+
28
+
29
+ @contextmanager
30
+ def environ_vars(env_vars: dict[str, str]) -> Iterator[None]:
31
+ """
32
+ Temporarily set multiple environment variables within a context.
33
+
34
+ Args:
35
+ env_vars: Dictionary mapping environment variable names to values
36
+
37
+ Yields:
38
+ None
39
+ """
40
+ # save previous values
41
+ previous_values = {}
42
+ for name in env_vars:
43
+ previous_values[name] = os.environ.get(name)
44
+
45
+ # set new values
46
+ for name, value in env_vars.items():
47
+ os.environ[name] = value
48
+
49
+ try:
50
+ yield
51
+ finally:
52
+ # Restore previous environment
53
+ for name in env_vars:
54
+ previous_value = previous_values[name]
55
+ if previous_value is None:
56
+ os.environ.pop(name, None)
57
+ else:
58
+ os.environ[name] = previous_value
inspect_ai/_util/file.py CHANGED
@@ -271,8 +271,15 @@ class FileSystem:
271
271
  if "mtime" not in file.keys() and file["type"] == "file":
272
272
  file["mtime"] = self.fs.created(file).timestamp()
273
273
 
274
+ # adjust mtime to be milliseconds
274
275
  if "mtime" in file.keys():
275
- file["mtime"] = file["mtime"] * 1000
276
+ mtime = file["mtime"]
277
+ if isinstance(mtime, datetime.datetime):
278
+ file["mtime"] = mtime.timestamp() * 1000
279
+ elif isinstance(mtime, int | float):
280
+ file["mtime"] = mtime * 1000
281
+ else:
282
+ raise ValueError(f"Unexpected type for mtime ({type(mtime)}): {mtime}")
276
283
  else:
277
284
  file["mtime"] = None
278
285
 
inspect_ai/_util/httpx.py CHANGED
@@ -1,10 +1,13 @@
1
1
  import logging
2
2
  from typing import Callable
3
3
 
4
- from httpx import ConnectError, ConnectTimeout, HTTPStatusError, ReadTimeout
4
+ import httpcore
5
+ import httpx
6
+ from httpx import HTTPStatusError
5
7
  from tenacity import RetryCallState
6
8
 
7
9
  from inspect_ai._util.constants import HTTP
10
+ from inspect_ai._util.http import is_retryable_http_status
8
11
 
9
12
  logger = logging.getLogger(__name__)
10
13
 
@@ -20,25 +23,10 @@ def httpx_should_retry(ex: BaseException) -> bool:
20
23
  Returns:
21
24
  True if a retry should occur
22
25
  """
23
- # httpx status exception
24
26
  if isinstance(ex, HTTPStatusError):
25
- # request timeout
26
- if ex.response.status_code == 408:
27
- return True
28
- # lock timeout
29
- elif ex.response.status_code == 409:
30
- return True
31
- # rate limit
32
- elif ex.response.status_code == 429:
33
- return True
34
- # internal errors
35
- elif ex.response.status_code >= 500:
36
- return True
37
- else:
38
- return False
39
-
40
- # connection error
41
- elif is_httpx_connection_error(ex):
27
+ return is_retryable_http_status(ex.response.status_code)
28
+
29
+ elif httpx_should_retry_no_status_code(ex):
42
30
  return True
43
31
 
44
32
  # don't retry
@@ -50,11 +38,106 @@ def log_httpx_retry_attempt(context: str) -> Callable[[RetryCallState], None]:
50
38
  def log_attempt(retry_state: RetryCallState) -> None:
51
39
  logger.log(
52
40
  HTTP,
53
- f"{context} connection retry {retry_state.attempt_number} after waiting for {retry_state.idle_for}",
41
+ f"{context} connection retry {retry_state.attempt_number} (retrying in {retry_state.upcoming_sleep:,.0f} seconds)",
54
42
  )
55
43
 
56
44
  return log_attempt
57
45
 
58
46
 
59
- def is_httpx_connection_error(ex: BaseException) -> bool:
60
- return isinstance(ex, ConnectTimeout | ConnectError | ConnectionError | ReadTimeout)
47
+ def httpx_should_retry_no_status_code(ex: BaseException) -> bool:
48
+ """
49
+ Check whether an exception (without an HTTP status code) should be retried.
50
+
51
+ To understand this function, it may be helpful to look at the exception hierarchies for
52
+ httpx and httpcore, which are reproduced below.
53
+
54
+
55
+ # HTTPX Exception Hierarchy
56
+ Exception (Python built-in)
57
+ |
58
+ +-- HTTPError
59
+ | |
60
+ | +-- RequestError
61
+ | | |
62
+ | | +-- TransportError
63
+ | | | |
64
+ | | | +-- TimeoutException
65
+ | | | | |
66
+ | | | | +-- ConnectTimeout
67
+ | | | | +-- ReadTimeout
68
+ | | | | +-- WriteTimeout
69
+ | | | | +-- PoolTimeout
70
+ | | | |
71
+ | | | +-- NetworkError
72
+ | | | | |
73
+ | | | | +-- ConnectError
74
+ | | | | +-- ReadError
75
+ | | | | +-- WriteError
76
+ | | | | +-- CloseError
77
+ | | | |
78
+ | | | +-- ProtocolError
79
+ | | | | |
80
+ | | | | +-- LocalProtocolError
81
+ | | | | +-- RemoteProtocolError
82
+ | | | |
83
+ | | | +-- ProxyError
84
+ | | | +-- UnsupportedProtocol
85
+ | | |
86
+ | | +-- DecodingError
87
+ | | +-- TooManyRedirects
88
+ | |
89
+ | +-- HTTPStatusError
90
+ |
91
+ +-- InvalidURL
92
+ +-- CookieConflict
93
+ +-- RuntimeError (Python built-in)
94
+ |
95
+ +-- StreamError
96
+ |
97
+ +-- StreamConsumed
98
+ +-- StreamClosed
99
+ +-- ResponseNotRead
100
+ +-- RequestNotRead
101
+
102
+
103
+ # HTTPCore Exception Hierarchy
104
+ Exception (Python built-in)
105
+ |
106
+ +-- ConnectionNotAvailable
107
+ +-- ProxyError
108
+ +-- UnsupportedProtocol
109
+ +-- ProtocolError
110
+ | |
111
+ | +-- RemoteProtocolError
112
+ | +-- LocalProtocolError
113
+ |
114
+ +-- TimeoutException
115
+ | |
116
+ | +-- PoolTimeout
117
+ | +-- ConnectTimeout
118
+ | +-- ReadTimeout
119
+ | +-- WriteTimeout
120
+ |
121
+ +-- NetworkError
122
+ |
123
+ +-- ConnectError
124
+ +-- ReadError
125
+ +-- WriteError
126
+ """
127
+ # Base class for all exceptions that occur at the level of the Transport API.
128
+ is_transport_error = isinstance(ex, httpx.TransportError)
129
+
130
+ # Sometimes exceptions are raised directly by httpcore, the lower-level library that httpx uses
131
+ is_httpcore_network_error = isinstance(ex, httpcore.NetworkError)
132
+ is_httpcore_timeout_error = isinstance(ex, httpcore.TimeoutException)
133
+ is_httpcore_protocol_error = isinstance(ex, httpcore.ProtocolError)
134
+
135
+ # extensible in case we notice other cases
136
+ return any(
137
+ [
138
+ is_transport_error,
139
+ is_httpcore_network_error,
140
+ is_httpcore_timeout_error,
141
+ is_httpcore_protocol_error,
142
+ ]
143
+ )
@@ -1,6 +1,17 @@
1
+ from __future__ import annotations
2
+
1
3
  import inspect
2
4
  from inspect import get_annotations, isclass
3
- from typing import Any, Callable, Literal, TypedDict, TypeGuard, cast
5
+ from typing import (
6
+ TYPE_CHECKING,
7
+ Any,
8
+ Callable,
9
+ Literal,
10
+ TypedDict,
11
+ TypeGuard,
12
+ cast,
13
+ overload,
14
+ )
4
15
 
5
16
  from pydantic import BaseModel, Field
6
17
  from pydantic_core import to_jsonable_python
@@ -11,19 +22,30 @@ from inspect_ai._util.package import get_installed_package_name
11
22
  from .constants import PKG_NAME
12
23
  from .entrypoints import ensure_entry_points
13
24
 
25
+ if TYPE_CHECKING:
26
+ from inspect_ai import Task
27
+ from inspect_ai.agent import Agent
28
+ from inspect_ai.approval import Approver
29
+ from inspect_ai.model import ModelAPI
30
+ from inspect_ai.scorer import Metric, Scorer, ScoreReducer
31
+ from inspect_ai.solver import Plan, Solver
32
+ from inspect_ai.tool import Tool
33
+ from inspect_ai.util import SandboxEnvironment
34
+
14
35
  obj_type = type
15
36
 
16
37
  RegistryType = Literal[
17
- "task",
18
- "solver",
19
38
  "agent",
20
- "tool",
21
- "scorer",
39
+ "approver",
22
40
  "metric",
23
- "score_reducer",
24
41
  "modelapi",
42
+ "plan",
25
43
  "sandboxenv",
26
- "approver",
44
+ "score_reducer",
45
+ "scorer",
46
+ "solver",
47
+ "task",
48
+ "tool",
27
49
  ]
28
50
  """Enumeration of registry object types.
29
51
 
@@ -184,7 +206,59 @@ def registry_find(predicate: Callable[[RegistryInfo], bool]) -> list[object]:
184
206
  return o
185
207
 
186
208
 
187
- def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
209
+ @overload
210
+ def registry_create(type: Literal["agent"], name: str, **kwargs: Any) -> Agent: ...
211
+
212
+
213
+ @overload
214
+ def registry_create(
215
+ type: Literal["approver"], name: str, **kwargs: Any
216
+ ) -> Approver: ...
217
+
218
+
219
+ @overload
220
+ def registry_create(type: Literal["metric"], name: str, **kwargs: Any) -> Metric: ...
221
+
222
+
223
+ @overload
224
+ def registry_create(
225
+ type: Literal["modelapi"], name: str, **kwargs: Any
226
+ ) -> ModelAPI: ...
227
+
228
+
229
+ @overload
230
+ def registry_create(type: Literal["plan"], name: str, **kwargs: Any) -> Plan: ...
231
+
232
+
233
+ @overload
234
+ def registry_create(
235
+ type: Literal["sandboxenv"], name: str, **kwargs: Any
236
+ ) -> SandboxEnvironment: ...
237
+
238
+
239
+ @overload
240
+ def registry_create(type: Literal["scorer"], name: str, **kwargs: Any) -> Scorer: ...
241
+
242
+
243
+ @overload
244
+ def registry_create(
245
+ type: Literal["score_reducer"], name: str, **kwargs: Any
246
+ ) -> ScoreReducer: ...
247
+
248
+
249
+ @overload
250
+ def registry_create(type: Literal["solver"], name: str, **kwargs: Any) -> Solver: ...
251
+
252
+
253
+ @overload
254
+ def registry_create(type: Literal["task"], name: str, **kwargs: Any) -> Task: ...
255
+
256
+
257
+ @overload
258
+ def registry_create(type: Literal["tool"], name: str, **kwargs: Any) -> Tool: ...
259
+
260
+
261
+ def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object: # type: ignore[return]
188
262
  r"""Create a registry object.
189
263
 
190
264
  Creates objects registered via decorator (e.g. `@task`, `@solver`). Note
@@ -230,7 +304,7 @@ def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
230
304
  if isclass(obj):
231
305
  return with_registry_info(obj(**kwargs))
232
306
  elif callable(obj):
233
- return_type = get_annotations(obj).get("return")
307
+ return_type = get_annotations(obj, eval_str=True).get("return")
234
308
  # Until we remove the MetricDeprecated symbol we need this extra
235
309
  # bit to map the Metric union back to Metric
236
310
  if "_metric.Metric" in str(return_type):