inspect-ai 0.3.69__py3-none-any.whl → 0.3.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. inspect_ai/_cli/eval.py +27 -9
  2. inspect_ai/_display/core/display.py +2 -0
  3. inspect_ai/_display/core/footer.py +13 -3
  4. inspect_ai/_display/plain/display.py +6 -2
  5. inspect_ai/_display/rich/display.py +19 -6
  6. inspect_ai/_display/textual/app.py +9 -3
  7. inspect_ai/_display/textual/display.py +4 -0
  8. inspect_ai/_display/textual/widgets/samples.py +4 -10
  9. inspect_ai/_display/textual/widgets/transcript.py +35 -18
  10. inspect_ai/_eval/eval.py +14 -2
  11. inspect_ai/_eval/evalset.py +6 -1
  12. inspect_ai/_eval/run.py +6 -0
  13. inspect_ai/_eval/task/run.py +49 -23
  14. inspect_ai/_eval/task/task.py +26 -3
  15. inspect_ai/_util/content.py +20 -1
  16. inspect_ai/_util/interrupt.py +6 -0
  17. inspect_ai/_util/logger.py +19 -0
  18. inspect_ai/_util/rich.py +7 -8
  19. inspect_ai/_util/text.py +13 -0
  20. inspect_ai/_util/transcript.py +20 -6
  21. inspect_ai/_util/working.py +50 -0
  22. inspect_ai/_view/www/App.css +6 -0
  23. inspect_ai/_view/www/dist/assets/index.css +171 -99
  24. inspect_ai/_view/www/dist/assets/index.js +5972 -2770
  25. inspect_ai/_view/www/eslint.config.mjs +24 -1
  26. inspect_ai/_view/www/log-schema.json +619 -21
  27. inspect_ai/_view/www/package.json +8 -3
  28. inspect_ai/_view/www/src/App.tsx +2 -2
  29. inspect_ai/_view/www/src/appearance/icons.ts +3 -1
  30. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
  31. inspect_ai/_view/www/src/components/Card.tsx +9 -8
  32. inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
  33. inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
  34. inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
  35. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
  36. inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
  37. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
  38. inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
  39. inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
  40. inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
  41. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
  42. inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
  43. inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
  44. inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
  45. inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
  46. inspect_ai/_view/www/src/index.tsx +2 -2
  47. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
  48. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
  49. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
  50. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -1
  51. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
  52. inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
  53. inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
  54. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
  55. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
  56. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
  57. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
  58. inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
  59. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
  60. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +30 -3
  61. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  62. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
  64. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
  65. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
  66. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
  67. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
  68. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
  69. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
  70. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
  71. inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
  72. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
  73. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
  74. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
  75. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
  76. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
  77. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
  78. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
  79. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
  80. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
  81. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
  82. inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
  83. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
  84. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
  85. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
  86. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
  87. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
  88. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
  89. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
  90. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
  91. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
  92. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
  93. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
  94. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
  95. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
  96. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
  97. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
  98. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
  99. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
  100. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +9 -4
  101. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
  102. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
  103. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +153 -0
  104. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
  105. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -5
  106. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
  107. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
  108. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +53 -16
  109. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
  110. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
  111. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
  112. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
  113. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
  114. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
  115. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
  116. inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
  117. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
  118. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
  119. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
  120. inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
  121. inspect_ai/_view/www/src/types/log.d.ts +312 -137
  122. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
  123. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
  124. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
  125. inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
  126. inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
  127. inspect_ai/_view/www/src/utils/format.ts +8 -5
  128. inspect_ai/_view/www/src/utils/json.ts +24 -0
  129. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
  130. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +18 -8
  131. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
  132. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
  133. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
  134. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
  135. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
  136. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
  137. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
  138. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
  139. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
  140. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
  141. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
  142. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
  143. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
  144. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
  145. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
  146. inspect_ai/_view/www/yarn.lock +241 -5
  147. inspect_ai/log/__init__.py +2 -0
  148. inspect_ai/log/_condense.py +4 -0
  149. inspect_ai/log/_log.py +72 -12
  150. inspect_ai/log/_recorders/eval.py +6 -1
  151. inspect_ai/log/_samples.py +5 -1
  152. inspect_ai/log/_transcript.py +89 -2
  153. inspect_ai/model/__init__.py +2 -0
  154. inspect_ai/model/_call_tools.py +8 -1
  155. inspect_ai/model/_chat_message.py +22 -7
  156. inspect_ai/model/_conversation.py +11 -9
  157. inspect_ai/model/_generate_config.py +25 -4
  158. inspect_ai/model/_model.py +164 -72
  159. inspect_ai/model/_model_call.py +10 -3
  160. inspect_ai/model/_model_output.py +3 -0
  161. inspect_ai/model/_openai.py +106 -40
  162. inspect_ai/model/_providers/anthropic.py +145 -26
  163. inspect_ai/model/_providers/bedrock.py +7 -0
  164. inspect_ai/model/_providers/cloudflare.py +20 -7
  165. inspect_ai/model/_providers/google.py +29 -8
  166. inspect_ai/model/_providers/groq.py +66 -27
  167. inspect_ai/model/_providers/hf.py +6 -0
  168. inspect_ai/model/_providers/mistral.py +78 -51
  169. inspect_ai/model/_providers/openai.py +66 -4
  170. inspect_ai/model/_providers/openai_o1.py +10 -0
  171. inspect_ai/model/_providers/providers.py +2 -2
  172. inspect_ai/model/_providers/util/tracker.py +92 -0
  173. inspect_ai/model/_providers/vllm.py +13 -5
  174. inspect_ai/model/_reasoning.py +15 -2
  175. inspect_ai/scorer/_model.py +23 -19
  176. inspect_ai/solver/_basic_agent.py +1 -3
  177. inspect_ai/solver/_bridge/patch.py +0 -2
  178. inspect_ai/solver/_human_agent/agent.py +14 -10
  179. inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
  180. inspect_ai/solver/_human_agent/commands/submit.py +76 -30
  181. inspect_ai/solver/_limit.py +4 -4
  182. inspect_ai/solver/_plan.py +0 -3
  183. inspect_ai/solver/_task_state.py +7 -0
  184. inspect_ai/tool/__init__.py +2 -0
  185. inspect_ai/tool/_tool.py +3 -1
  186. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
  187. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
  188. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
  189. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
  190. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
  191. inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
  192. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
  193. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
  194. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
  195. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
  196. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
  197. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
  198. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
  199. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
  200. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
  201. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
  202. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
  203. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
  204. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
  205. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
  206. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
  207. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
  208. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
  209. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
  210. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
  211. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
  212. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
  213. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
  214. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
  215. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
  216. inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
  217. inspect_ai/tool/_tools/_web_search.py +3 -3
  218. inspect_ai/util/__init__.py +2 -1
  219. inspect_ai/util/_concurrency.py +14 -8
  220. inspect_ai/util/_display.py +12 -0
  221. inspect_ai/util/_sandbox/context.py +15 -0
  222. inspect_ai/util/_sandbox/docker/docker.py +7 -5
  223. inspect_ai/util/_sandbox/environment.py +32 -1
  224. inspect_ai/util/_sandbox/events.py +183 -0
  225. inspect_ai/util/_sandbox/local.py +3 -3
  226. inspect_ai/util/_sandbox/self_check.py +131 -43
  227. inspect_ai/util/_subtask.py +11 -0
  228. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/METADATA +3 -3
  229. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/RECORD +233 -211
  230. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/WHEEL +1 -1
  231. inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
  232. inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
  233. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
  234. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
  235. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
  236. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
  237. inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
  238. inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
  239. inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
  240. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/LICENSE +0 -0
  241. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/entry_points.txt +0 -0
  242. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,9 @@ from textwrap import dedent
3
3
 
4
4
  from pydantic import Field
5
5
 
6
+ from inspect_ai._util.content import ContentText
6
7
  from inspect_ai._util.error import PrerequisiteError
7
- from inspect_ai.tool._tool import Tool, ToolError, tool
8
+ from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
8
9
  from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
9
10
  from inspect_ai.tool._tool_info import parse_tool_info
10
11
  from inspect_ai.tool._tool_with import tool_with
@@ -58,10 +59,10 @@ def web_browser_go() -> Tool:
58
59
  Web browser navigation tool.
59
60
  """
60
61
 
61
- async def execute(url: str) -> str:
62
+ async def execute(url: str) -> ToolResult:
62
63
  """Navigate the web browser to a URL.
63
64
 
64
- Once you have navigated to a page, you will be presented with a web accessibilty tree of the elements on the page. Each element has an ID, which is displayed in brackets at the beginning of its line. For example:
65
+ Once you have navigated to a page, you will be presented with a web accessibility tree of the elements on the page. Each element has an ID, which is displayed in brackets at the beginning of its line. For example:
65
66
 
66
67
  ```
67
68
  [1] RootWebArea "Google" [focused: True, url: https://www.google.com/]
@@ -99,16 +100,17 @@ def go_without_interactive_docs(tool: Tool) -> Tool:
99
100
 
100
101
 
101
102
  # custom viewer for interactive tool calls that shows a truncated
102
- # version of current the web accessiblity tree if available
103
+ # version of current the web accessibility tree if available
103
104
 
104
105
 
105
106
  class WebBrowserStore(StoreModel):
107
+ main_content: str = Field(default_factory=str)
106
108
  web_at: str = Field(default_factory=str)
107
109
  session_id: str = Field(default_factory=str)
108
110
 
109
111
 
110
112
  def web_at_viewer(call: ToolCall) -> ToolCallView:
111
- # get the web accessiblity tree, if we have it create a view from it
113
+ # get the web accessibility tree, if we have it create a view from it
112
114
  web_at = store_as(WebBrowserStore).web_at
113
115
  element_id = call.arguments.get("element_id", 0)
114
116
  if web_at and element_id:
@@ -141,10 +143,10 @@ def web_browser_click() -> Tool:
141
143
  Web browser clicking tool.
142
144
  """
143
145
 
144
- async def execute(element_id: int) -> str:
146
+ async def execute(element_id: int) -> ToolResult:
145
147
  """Click an element on the page currently displayed by the web browser.
146
148
 
147
- For example, with the following web accessibilty tree:
149
+ For example, with the following web accessibility tree:
148
150
 
149
151
  ```
150
152
  [304] RootWebArea "Poetry Foundation" [focused: True, url: https://www.poetryfoundation.org/]
@@ -176,7 +178,7 @@ def web_browser_type_submit() -> Tool:
176
178
  Web browser type and submit tool.
177
179
  """
178
180
 
179
- async def execute(element_id: int, text: str) -> str:
181
+ async def execute(element_id: int, text: str) -> ToolResult:
180
182
  """Type text into a form input on a web browser page and press ENTER to submit the form.
181
183
 
182
184
  For example, to execute a search for "Yeats" from this page:
@@ -214,7 +216,7 @@ def web_browser_type() -> Tool:
214
216
  Web browser typing tool.
215
217
  """
216
218
 
217
- async def execute(element_id: int, text: str) -> str:
219
+ async def execute(element_id: int, text: str) -> ToolResult:
218
220
  """Type text into an input on a web browser page.
219
221
 
220
222
  For example, to type "Norah" into the "First Name" search box on this page:
@@ -252,7 +254,7 @@ def web_browser_scroll() -> Tool:
252
254
  Web browser scrolling tool.
253
255
  """
254
256
 
255
- async def execute(direction: str) -> str:
257
+ async def execute(direction: str) -> ToolResult:
256
258
  """Scroll the web browser up or down by one page.
257
259
 
258
260
  Occasionally some very long pages don't display all of their content at once. To see additional content you can scroll the page down with:
@@ -282,7 +284,7 @@ def web_browser_back() -> Tool:
282
284
  Web browser back navigation tool.
283
285
  """
284
286
 
285
- async def execute() -> str:
287
+ async def execute() -> ToolResult:
286
288
  """Navigate the web browser back in the browser history.
287
289
 
288
290
  If you want to view a page that you have previously browsed (or perhaps just didn't find what you were looking for on a page and want to backtrack) use the web_browser_back tool.
@@ -303,7 +305,7 @@ def web_browser_forward() -> Tool:
303
305
  Web browser forward navigation tool.
304
306
  """
305
307
 
306
- async def execute() -> str:
308
+ async def execute() -> ToolResult:
307
309
  """Navigate the web browser forward in the browser history.
308
310
 
309
311
  If you have navigated back in the browser history and then want to navigate forward use the web_browser_forward tool.
@@ -324,7 +326,7 @@ def web_browser_refresh() -> Tool:
324
326
  Web browser page refresh tool.
325
327
  """
326
328
 
327
- async def execute() -> str:
329
+ async def execute() -> ToolResult:
328
330
  """Refresh the current page of the web browser.
329
331
 
330
332
  If you have interacted with a page by clicking buttons and want to reset it to its original state, use the web_browser_refresh tool.
@@ -341,7 +343,7 @@ WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
341
343
  WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
342
344
 
343
345
 
344
- async def web_browser_cmd(cmd: str, *args: str) -> str:
346
+ async def web_browser_cmd(cmd: str, *args: str) -> ToolResult:
345
347
  sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
346
348
  session_flag = ""
347
349
  if sandbox_env:
@@ -379,17 +381,30 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
379
381
  if "error" in response and response.get("error", "").strip() != "":
380
382
  raise ToolError(str(response.get("error")) or "(unknown error)")
381
383
  elif "web_at" in response:
384
+ main_content = str(response.get("main_content")) or None
382
385
  web_at = (
383
- str(response.get("web_at")) or "(no web accessiblity tree available)"
386
+ str(response.get("web_at")) or "(no web accessibility tree available)"
384
387
  )
385
388
  # Remove base64 data from images.
386
389
  web_at_lines = web_at.split("\n")
387
390
  web_at_lines = [
388
391
  line.partition("data:image/png;base64")[0] for line in web_at_lines
389
392
  ]
390
- web_at = "\n".join(web_at_lines)
393
+
394
+ store_as(WebBrowserStore).main_content = (
395
+ main_content or "(no main text summary)"
396
+ )
391
397
  store_as(WebBrowserStore).web_at = web_at
392
- return web_at
398
+
399
+ web_at = "\n".join(web_at_lines)
400
+ return (
401
+ [
402
+ ContentText(text=f"main content:\n{main_content}\n\n"),
403
+ ContentText(text=f"accessibility tree:\n{web_at}"),
404
+ ]
405
+ if main_content
406
+ else web_at
407
+ )
393
408
  else:
394
409
  raise RuntimeError(
395
410
  f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
@@ -425,7 +440,9 @@ async def web_browser_sandbox() -> SandboxEnvironment:
425
440
 
426
441
 
427
442
  def parse_web_browser_output(output: str) -> dict[str, str]:
428
- response: dict[str, str] = dict(web_url="", web_at="", info="", error="")
443
+ response: dict[str, str] = dict(
444
+ web_url="", main_content="", web_at="", info="", error=""
445
+ )
429
446
  active_field: str | None = None
430
447
  active_field_lines: list[str] = []
431
448
 
@@ -435,7 +452,9 @@ def parse_web_browser_output(output: str) -> dict[str, str]:
435
452
  active_field_lines.clear()
436
453
 
437
454
  for line in output.splitlines():
438
- field_match = re.match(r"^(error|web_at|web_url|info)\s*:\s*(.+)$", line)
455
+ field_match = re.match(
456
+ r"^(error|main_content|web_at|web_url|info)\s*:\s*(.+)$", line
457
+ )
439
458
  if field_match:
440
459
  collect_active_field()
441
460
  active_field = field_match.group(1)
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import os
3
- from typing import Literal, Protocol, cast, runtime_checkable
3
+ from typing import Literal, Protocol, runtime_checkable
4
4
 
5
5
  import httpx
6
6
  from bs4 import BeautifulSoup, NavigableString
@@ -90,8 +90,8 @@ def web_search(
90
90
  return_exceptions=True,
91
91
  )
92
92
  for page, link in zip(pages, links):
93
- if page and not isinstance(page, Exception):
94
- page_contents.append(cast(str, page))
93
+ if page and not isinstance(page, BaseException):
94
+ page_contents.append(page)
95
95
  urls.append(link.url)
96
96
  snippets.append(link.snippet)
97
97
  search_calls += 1
@@ -2,7 +2,7 @@ from inspect_ai._util.trace import trace_action, trace_message
2
2
 
3
3
  from ._concurrency import concurrency
4
4
  from ._console import input_screen
5
- from ._display import DisplayType, display_type
5
+ from ._display import DisplayType, display_counter, display_type
6
6
  from ._panel import InputPanel, input_panel
7
7
  from ._resource import resource
8
8
  from ._sandbox import (
@@ -31,6 +31,7 @@ __all__ = [
31
31
  "ExecResult",
32
32
  "concurrency",
33
33
  "DisplayType",
34
+ "display_counter",
34
35
  "display_type",
35
36
  "InputPanel",
36
37
  "input_panel",
@@ -1,13 +1,19 @@
1
1
  import asyncio
2
+ import contextlib
3
+ import time
2
4
  from dataclasses import dataclass
5
+ from typing import AsyncIterator
3
6
 
7
+ from inspect_ai._util.working import report_sample_waiting_time
4
8
 
5
- def concurrency(
9
+
10
+ @contextlib.asynccontextmanager
11
+ async def concurrency(
6
12
  name: str,
7
13
  concurrency: int,
8
14
  key: str | None = None,
9
- ) -> asyncio.Semaphore:
10
- """Obtain a concurrency context.
15
+ ) -> AsyncIterator[None]:
16
+ """Concurrency context manager.
11
17
 
12
18
  A concurrency context can be used to limit the number of coroutines
13
19
  executing a block of code (e.g calling an API). For example, here
@@ -32,9 +38,6 @@ def concurrency(
32
38
  Used if the unique key isn't human readable -- e.g. includes
33
39
  api tokens or account ids so that the more readable `name`
34
40
  can be presented to users e.g in console UI>
35
-
36
- Returns:
37
- Asyncio Semaphore for concurrency context.
38
41
  """
39
42
  # sort out key
40
43
  key = key if key else name
@@ -47,8 +50,11 @@ def concurrency(
47
50
  )
48
51
  _concurrency_semaphores[key] = semaphore
49
52
 
50
- # return the semaphore
51
- return semaphore.semaphore
53
+ # wait and yield to protected code
54
+ start_wait = time.monotonic()
55
+ async with semaphore.semaphore:
56
+ report_sample_waiting_time(time.monotonic() - start_wait)
57
+ yield
52
58
 
53
59
 
54
60
  def concurrency_status() -> dict[str, tuple[int, int]]:
@@ -54,3 +54,15 @@ def display_type() -> DisplayType:
54
54
  def display_type_initialized() -> bool:
55
55
  global _display_type
56
56
  return _display_type is not None
57
+
58
+
59
+ def display_counter(caption: str, value: str) -> None:
60
+ """Display a counter in the UI.
61
+
62
+ Args:
63
+ caption: The counter's caption e.g. "HTTP rate limits".
64
+ value: The counter's value e.g. "42".
65
+ """
66
+ from inspect_ai._display.core.active import display
67
+
68
+ display().display_counter(caption, value)
@@ -5,6 +5,7 @@ from typing import Any, NoReturn, cast
5
5
  from shortuuid import uuid
6
6
 
7
7
  from inspect_ai._util.constants import SANDBOX_SETUP_TIMEOUT
8
+ from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
8
9
 
9
10
  from .environment import (
10
11
  SampleCleanup,
@@ -132,6 +133,9 @@ async def init_sandbox_environments_sample(
132
133
  # verify that there is at least one environment and a 'default' env
133
134
  validate_sandbox_environments(sandboxenv_type, environments)
134
135
 
136
+ # proxy environments (for recording SandboxEvent)
137
+ environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
138
+
135
139
  try:
136
140
  # copy files into environments
137
141
  await copy_sandbox_environment_files(files, environments)
@@ -148,6 +152,7 @@ async def init_sandbox_environments_sample(
148
152
  return environments
149
153
 
150
154
  except Exception as ex:
155
+ environments = unproxy_environments(environments)
151
156
  await sample_cleanup(task_name, config, environments, True)
152
157
  raise ex
153
158
 
@@ -161,9 +166,19 @@ async def cleanup_sandbox_environments_sample(
161
166
  ) -> None:
162
167
  sandboxenv_type = registry_find_sandboxenv(type)
163
168
  sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
169
+ environments = unproxy_environments(environments)
164
170
  await sample_cleanup(task_name, config, environments, interrupted)
165
171
 
166
172
 
173
+ def unproxy_environments(
174
+ environments: dict[str, SandboxEnvironment],
175
+ ) -> dict[str, SandboxEnvironment]:
176
+ return {
177
+ k: v._sandbox
178
+ for k, v in cast(dict[str, SandboxEnvironmentProxy], environments).items()
179
+ }
180
+
181
+
167
182
  async def copy_sandbox_environment_files(
168
183
  files: dict[str, bytes], environments: dict[str, SandboxEnvironment]
169
184
  ) -> None:
@@ -5,7 +5,7 @@ import os
5
5
  import tempfile
6
6
  from logging import getLogger
7
7
  from pathlib import Path, PurePosixPath
8
- from typing import Literal, Union, cast, overload
8
+ from typing import Literal, Union, overload
9
9
 
10
10
  from typing_extensions import override
11
11
 
@@ -145,7 +145,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
145
145
  project = await ComposeProject.create(
146
146
  name=task_project_name(task_name),
147
147
  config=config,
148
- sample_id=sample.id if sample is not None else None,
148
+ sample_id=sample.sample.id if sample is not None else None,
149
149
  epoch=sample.epoch if sample is not None else None,
150
150
  env=env,
151
151
  )
@@ -221,9 +221,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
221
221
  # (this enables us to show output for the cleanup operation)
222
222
  if not interrupted:
223
223
  # extract project from first environment
224
- project = cast(
225
- DockerSandboxEnvironment, next(iter(environments.values()))
226
- )._project
224
+ project = (
225
+ next(iter(environments.values()))
226
+ .as_type(DockerSandboxEnvironment)
227
+ ._project
228
+ )
227
229
  # cleanup the project
228
230
  await project_cleanup(project=project, quiet=True)
229
231
 
@@ -2,12 +2,24 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  from dataclasses import dataclass, field
5
- from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
5
+ from typing import (
6
+ Any,
7
+ Awaitable,
8
+ Callable,
9
+ Literal,
10
+ NamedTuple,
11
+ Type,
12
+ TypeVar,
13
+ Union,
14
+ overload,
15
+ )
6
16
 
7
17
  from pydantic import BaseModel, Field
8
18
 
9
19
  from .._subprocess import ExecResult
10
20
 
21
+ ST = TypeVar("ST", bound="SandboxEnvironment")
22
+
11
23
  TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
12
24
  TaskCleanup = Callable[
13
25
  [str, Union["SandboxEnvironmentConfigType", None], bool], Awaitable[None]
@@ -180,6 +192,25 @@ class SandboxEnvironment(abc.ABC):
180
192
  """
181
193
  raise NotImplementedError("connection not implemented")
182
194
 
195
+ def as_type(self, sandbox_cls: Type[ST]) -> ST:
196
+ """Verify and return a reference to a subclass of SandboxEnvironment.
197
+
198
+ Args:
199
+ sandbox_cls: Class of sandbox (subclass of SandboxEnvironment)
200
+
201
+ Returns:
202
+ Reference to the sandbox using the requested type.
203
+
204
+ Raises:
205
+ TypeError: If the sandbox is not of the requested type.
206
+ """
207
+ if isinstance(self, sandbox_cls):
208
+ return self
209
+ else:
210
+ raise TypeError(
211
+ f"Expected instance of {sandbox_cls.__name__}, got {type(self).__name__}"
212
+ )
213
+
183
214
  @classmethod
184
215
  def config_files(cls) -> list[str]:
185
216
  """Standard config files for this provider (used for automatic discovery)"""
@@ -0,0 +1,183 @@
1
+ import contextlib
2
+ import shlex
3
+ from datetime import datetime
4
+ from typing import Iterator, Literal, Type, Union, overload
5
+
6
+ from pydantic import JsonValue
7
+ from pydantic_core import to_jsonable_python
8
+ from typing_extensions import override
9
+
10
+ from inspect_ai._util.text import truncate_lines
11
+ from inspect_ai.util._subprocess import ExecResult
12
+
13
+ from .environment import (
14
+ ST,
15
+ SandboxConnection,
16
+ SandboxEnvironment,
17
+ SandboxEnvironmentConfigType,
18
+ )
19
+
20
+
21
+ class SandboxEnvironmentProxy(SandboxEnvironment):
22
+ def __init__(self, sandbox: SandboxEnvironment) -> None:
23
+ self._sandbox = sandbox
24
+ self._events = True
25
+
26
+ @override
27
+ async def exec(
28
+ self,
29
+ cmd: list[str],
30
+ input: str | bytes | None = None,
31
+ cwd: str | None = None,
32
+ env: dict[str, str] = {},
33
+ user: str | None = None,
34
+ timeout: int | None = None,
35
+ timeout_retry: bool = True,
36
+ ) -> ExecResult[str]:
37
+ from inspect_ai.log._transcript import SandboxEvent, transcript
38
+
39
+ # started
40
+ timestamp = datetime.now()
41
+
42
+ # make call
43
+ result = await self._sandbox.exec(
44
+ cmd, input, cwd, env, user, timeout, timeout_retry
45
+ )
46
+
47
+ # yield event
48
+ options: dict[str, JsonValue] = {}
49
+ if cwd:
50
+ options["cwd"] = cwd
51
+ if env:
52
+ options["env"] = to_jsonable_python(env)
53
+ if user:
54
+ options["user"] = user
55
+ if timeout is not None:
56
+ options["timeout"] = timeout
57
+ if timeout_retry is not True:
58
+ options["timeout_retry"] = timeout_retry
59
+
60
+ if self._events:
61
+ transcript()._event(
62
+ SandboxEvent(
63
+ timestamp=timestamp,
64
+ action="exec",
65
+ cmd=" ".join([shlex.quote(c) for c in cmd]),
66
+ input=content_display(input) if input is not None else None,
67
+ options=options,
68
+ result=result.returncode,
69
+ output=content_display(
70
+ f"{result.stderr}\n\n{result.stdout}"
71
+ if result.stderr
72
+ else result.stdout
73
+ ),
74
+ completed=datetime.now(),
75
+ )
76
+ )
77
+
78
+ # return result
79
+ return result
80
+
81
+ @override
82
+ async def write_file(self, file: str, contents: str | bytes) -> None:
83
+ from inspect_ai.log._transcript import SandboxEvent, transcript
84
+
85
+ timestamp = datetime.now()
86
+
87
+ # make call
88
+ await self._sandbox.write_file(file, contents)
89
+
90
+ # yield event
91
+ if self._events:
92
+ transcript()._event(
93
+ SandboxEvent(
94
+ timestamp=timestamp,
95
+ action="write_file",
96
+ file=file,
97
+ input=content_display(contents),
98
+ completed=datetime.now(),
99
+ )
100
+ )
101
+
102
+ @overload
103
+ async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
104
+
105
+ @overload
106
+ async def read_file(self, file: str, text: Literal[False]) -> bytes: ...
107
+
108
+ @override
109
+ async def read_file(self, file: str, text: bool = True) -> Union[str | bytes]:
110
+ from inspect_ai.log._transcript import SandboxEvent, transcript
111
+
112
+ timestamp = datetime.now()
113
+
114
+ # make call
115
+ if text is True:
116
+ output: str | bytes = await self._sandbox.read_file(file, True)
117
+ else:
118
+ output = await self._sandbox.read_file(file, False)
119
+
120
+ # yield event
121
+ if self._events:
122
+ transcript()._event(
123
+ SandboxEvent(
124
+ timestamp=timestamp,
125
+ action="read_file",
126
+ file=file,
127
+ output=content_display(output),
128
+ completed=datetime.now(),
129
+ )
130
+ )
131
+
132
+ # return result
133
+ return output
134
+
135
+ @override
136
+ async def connection(self) -> SandboxConnection:
137
+ return await self._sandbox.connection()
138
+
139
+ @override
140
+ def as_type(self, sandbox_cls: Type[ST]) -> ST:
141
+ if isinstance(self._sandbox, sandbox_cls):
142
+ return self._sandbox
143
+ else:
144
+ raise TypeError(
145
+ f"Expected instance of {sandbox_cls.__name__}, got {type(self._sandbox).__name__}"
146
+ )
147
+
148
+ @contextlib.contextmanager
149
+ def no_events(self) -> Iterator[None]:
150
+ self._events = False
151
+ try:
152
+ yield
153
+ finally:
154
+ self._events = True
155
+
156
+ @classmethod
157
+ async def sample_cleanup(
158
+ cls,
159
+ task_name: str,
160
+ config: SandboxEnvironmentConfigType | None,
161
+ environments: dict[str, SandboxEnvironment],
162
+ interrupted: bool,
163
+ ) -> None:
164
+ pass
165
+
166
+
167
+ def content_display(content: str | bytes) -> str:
168
+ if isinstance(content, str):
169
+ content, truncated = truncate_lines(content, 20)
170
+ if truncated:
171
+ content = f"{content}\n\nOutput truncated ({truncated} additional lines)"
172
+ return content
173
+ else:
174
+ return f"binary ({pretty_size(len(content))})"
175
+
176
+
177
+ def pretty_size(size: int) -> str:
178
+ if size < 1024:
179
+ return f"{size} B"
180
+ if size < 1024 * 1024:
181
+ return f"{size / 1024:.2f} KB"
182
+
183
+ return f"{size / (1024 * 1024):.2f} MB"
@@ -1,7 +1,7 @@
1
1
  import tempfile
2
2
  import warnings
3
3
  from pathlib import Path
4
- from typing import Literal, Union, cast, overload
4
+ from typing import Literal, Union, overload
5
5
 
6
6
  from typing_extensions import override
7
7
 
@@ -40,8 +40,8 @@ class LocalSandboxEnvironment(SandboxEnvironment):
40
40
  interrupted: bool,
41
41
  ) -> None:
42
42
  for environment in environments.values():
43
- env = cast(LocalSandboxEnvironment, environment)
44
- env.directory.cleanup()
43
+ sandbox = environment.as_type(LocalSandboxEnvironment)
44
+ sandbox.directory.cleanup()
45
45
 
46
46
  def __init__(self) -> None:
47
47
  self.directory = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)