inspect-ai 0.3.69__py3-none-any.whl → 0.3.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +27 -9
- inspect_ai/_display/core/display.py +2 -0
- inspect_ai/_display/core/footer.py +13 -3
- inspect_ai/_display/plain/display.py +6 -2
- inspect_ai/_display/rich/display.py +19 -6
- inspect_ai/_display/textual/app.py +9 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +4 -10
- inspect_ai/_display/textual/widgets/transcript.py +35 -18
- inspect_ai/_eval/eval.py +14 -2
- inspect_ai/_eval/evalset.py +6 -1
- inspect_ai/_eval/run.py +6 -0
- inspect_ai/_eval/task/run.py +49 -23
- inspect_ai/_eval/task/task.py +26 -3
- inspect_ai/_util/content.py +20 -1
- inspect_ai/_util/interrupt.py +6 -0
- inspect_ai/_util/logger.py +19 -0
- inspect_ai/_util/rich.py +7 -8
- inspect_ai/_util/text.py +13 -0
- inspect_ai/_util/transcript.py +20 -6
- inspect_ai/_util/working.py +50 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +171 -99
- inspect_ai/_view/www/dist/assets/index.js +5972 -2770
- inspect_ai/_view/www/eslint.config.mjs +24 -1
- inspect_ai/_view/www/log-schema.json +619 -21
- inspect_ai/_view/www/package.json +8 -3
- inspect_ai/_view/www/src/App.tsx +2 -2
- inspect_ai/_view/www/src/appearance/icons.ts +3 -1
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
- inspect_ai/_view/www/src/components/Card.tsx +9 -8
- inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
- inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
- inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
- inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
- inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
- inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
- inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
- inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
- inspect_ai/_view/www/src/index.tsx +2 -2
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -1
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
- inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
- inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +30 -3
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +25 -4
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
- inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +9 -4
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +153 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -5
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +53 -16
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
- inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
- inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
- inspect_ai/_view/www/src/types/log.d.ts +312 -137
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
- inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
- inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
- inspect_ai/_view/www/src/utils/format.ts +8 -5
- inspect_ai/_view/www/src/utils/json.ts +24 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +18 -8
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
- inspect_ai/_view/www/yarn.lock +241 -5
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_condense.py +4 -0
- inspect_ai/log/_log.py +72 -12
- inspect_ai/log/_recorders/eval.py +6 -1
- inspect_ai/log/_samples.py +5 -1
- inspect_ai/log/_transcript.py +89 -2
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +8 -1
- inspect_ai/model/_chat_message.py +22 -7
- inspect_ai/model/_conversation.py +11 -9
- inspect_ai/model/_generate_config.py +25 -4
- inspect_ai/model/_model.py +164 -72
- inspect_ai/model/_model_call.py +10 -3
- inspect_ai/model/_model_output.py +3 -0
- inspect_ai/model/_openai.py +106 -40
- inspect_ai/model/_providers/anthropic.py +145 -26
- inspect_ai/model/_providers/bedrock.py +7 -0
- inspect_ai/model/_providers/cloudflare.py +20 -7
- inspect_ai/model/_providers/google.py +29 -8
- inspect_ai/model/_providers/groq.py +66 -27
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +78 -51
- inspect_ai/model/_providers/openai.py +66 -4
- inspect_ai/model/_providers/openai_o1.py +10 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/tracker.py +92 -0
- inspect_ai/model/_providers/vllm.py +13 -5
- inspect_ai/model/_reasoning.py +15 -2
- inspect_ai/scorer/_model.py +23 -19
- inspect_ai/solver/_basic_agent.py +1 -3
- inspect_ai/solver/_bridge/patch.py +0 -2
- inspect_ai/solver/_human_agent/agent.py +14 -10
- inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
- inspect_ai/solver/_human_agent/commands/submit.py +76 -30
- inspect_ai/solver/_limit.py +4 -4
- inspect_ai/solver/_plan.py +0 -3
- inspect_ai/solver/_task_state.py +7 -0
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +3 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
- inspect_ai/tool/_tools/_web_search.py +3 -3
- inspect_ai/util/__init__.py +2 -1
- inspect_ai/util/_concurrency.py +14 -8
- inspect_ai/util/_display.py +12 -0
- inspect_ai/util/_sandbox/context.py +15 -0
- inspect_ai/util/_sandbox/docker/docker.py +7 -5
- inspect_ai/util/_sandbox/environment.py +32 -1
- inspect_ai/util/_sandbox/events.py +183 -0
- inspect_ai/util/_sandbox/local.py +3 -3
- inspect_ai/util/_sandbox/self_check.py +131 -43
- inspect_ai/util/_subtask.py +11 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/RECORD +233 -211
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
- inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
- inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,9 @@ from textwrap import dedent
|
|
3
3
|
|
4
4
|
from pydantic import Field
|
5
5
|
|
6
|
+
from inspect_ai._util.content import ContentText
|
6
7
|
from inspect_ai._util.error import PrerequisiteError
|
7
|
-
from inspect_ai.tool._tool import Tool, ToolError, tool
|
8
|
+
from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
|
8
9
|
from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
|
9
10
|
from inspect_ai.tool._tool_info import parse_tool_info
|
10
11
|
from inspect_ai.tool._tool_with import tool_with
|
@@ -58,10 +59,10 @@ def web_browser_go() -> Tool:
|
|
58
59
|
Web browser navigation tool.
|
59
60
|
"""
|
60
61
|
|
61
|
-
async def execute(url: str) ->
|
62
|
+
async def execute(url: str) -> ToolResult:
|
62
63
|
"""Navigate the web browser to a URL.
|
63
64
|
|
64
|
-
Once you have navigated to a page, you will be presented with a web
|
65
|
+
Once you have navigated to a page, you will be presented with a web accessibility tree of the elements on the page. Each element has an ID, which is displayed in brackets at the beginning of its line. For example:
|
65
66
|
|
66
67
|
```
|
67
68
|
[1] RootWebArea "Google" [focused: True, url: https://www.google.com/]
|
@@ -99,16 +100,17 @@ def go_without_interactive_docs(tool: Tool) -> Tool:
|
|
99
100
|
|
100
101
|
|
101
102
|
# custom viewer for interactive tool calls that shows a truncated
|
102
|
-
# version of current the web
|
103
|
+
# version of current the web accessibility tree if available
|
103
104
|
|
104
105
|
|
105
106
|
class WebBrowserStore(StoreModel):
|
107
|
+
main_content: str = Field(default_factory=str)
|
106
108
|
web_at: str = Field(default_factory=str)
|
107
109
|
session_id: str = Field(default_factory=str)
|
108
110
|
|
109
111
|
|
110
112
|
def web_at_viewer(call: ToolCall) -> ToolCallView:
|
111
|
-
# get the web
|
113
|
+
# get the web accessibility tree, if we have it create a view from it
|
112
114
|
web_at = store_as(WebBrowserStore).web_at
|
113
115
|
element_id = call.arguments.get("element_id", 0)
|
114
116
|
if web_at and element_id:
|
@@ -141,10 +143,10 @@ def web_browser_click() -> Tool:
|
|
141
143
|
Web browser clicking tool.
|
142
144
|
"""
|
143
145
|
|
144
|
-
async def execute(element_id: int) ->
|
146
|
+
async def execute(element_id: int) -> ToolResult:
|
145
147
|
"""Click an element on the page currently displayed by the web browser.
|
146
148
|
|
147
|
-
For example, with the following web
|
149
|
+
For example, with the following web accessibility tree:
|
148
150
|
|
149
151
|
```
|
150
152
|
[304] RootWebArea "Poetry Foundation" [focused: True, url: https://www.poetryfoundation.org/]
|
@@ -176,7 +178,7 @@ def web_browser_type_submit() -> Tool:
|
|
176
178
|
Web browser type and submit tool.
|
177
179
|
"""
|
178
180
|
|
179
|
-
async def execute(element_id: int, text: str) ->
|
181
|
+
async def execute(element_id: int, text: str) -> ToolResult:
|
180
182
|
"""Type text into a form input on a web browser page and press ENTER to submit the form.
|
181
183
|
|
182
184
|
For example, to execute a search for "Yeats" from this page:
|
@@ -214,7 +216,7 @@ def web_browser_type() -> Tool:
|
|
214
216
|
Web browser typing tool.
|
215
217
|
"""
|
216
218
|
|
217
|
-
async def execute(element_id: int, text: str) ->
|
219
|
+
async def execute(element_id: int, text: str) -> ToolResult:
|
218
220
|
"""Type text into an input on a web browser page.
|
219
221
|
|
220
222
|
For example, to type "Norah" into the "First Name" search box on this page:
|
@@ -252,7 +254,7 @@ def web_browser_scroll() -> Tool:
|
|
252
254
|
Web browser scrolling tool.
|
253
255
|
"""
|
254
256
|
|
255
|
-
async def execute(direction: str) ->
|
257
|
+
async def execute(direction: str) -> ToolResult:
|
256
258
|
"""Scroll the web browser up or down by one page.
|
257
259
|
|
258
260
|
Occasionally some very long pages don't display all of their content at once. To see additional content you can scroll the page down with:
|
@@ -282,7 +284,7 @@ def web_browser_back() -> Tool:
|
|
282
284
|
Web browser back navigation tool.
|
283
285
|
"""
|
284
286
|
|
285
|
-
async def execute() ->
|
287
|
+
async def execute() -> ToolResult:
|
286
288
|
"""Navigate the web browser back in the browser history.
|
287
289
|
|
288
290
|
If you want to view a page that you have previously browsed (or perhaps just didn't find what you were looking for on a page and want to backtrack) use the web_browser_back tool.
|
@@ -303,7 +305,7 @@ def web_browser_forward() -> Tool:
|
|
303
305
|
Web browser forward navigation tool.
|
304
306
|
"""
|
305
307
|
|
306
|
-
async def execute() ->
|
308
|
+
async def execute() -> ToolResult:
|
307
309
|
"""Navigate the web browser forward in the browser history.
|
308
310
|
|
309
311
|
If you have navigated back in the browser history and then want to navigate forward use the web_browser_forward tool.
|
@@ -324,7 +326,7 @@ def web_browser_refresh() -> Tool:
|
|
324
326
|
Web browser page refresh tool.
|
325
327
|
"""
|
326
328
|
|
327
|
-
async def execute() ->
|
329
|
+
async def execute() -> ToolResult:
|
328
330
|
"""Refresh the current page of the web browser.
|
329
331
|
|
330
332
|
If you have interacted with a page by clicking buttons and want to reset it to its original state, use the web_browser_refresh tool.
|
@@ -341,7 +343,7 @@ WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
|
|
341
343
|
WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
|
342
344
|
|
343
345
|
|
344
|
-
async def web_browser_cmd(cmd: str, *args: str) ->
|
346
|
+
async def web_browser_cmd(cmd: str, *args: str) -> ToolResult:
|
345
347
|
sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
|
346
348
|
session_flag = ""
|
347
349
|
if sandbox_env:
|
@@ -379,17 +381,30 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
|
|
379
381
|
if "error" in response and response.get("error", "").strip() != "":
|
380
382
|
raise ToolError(str(response.get("error")) or "(unknown error)")
|
381
383
|
elif "web_at" in response:
|
384
|
+
main_content = str(response.get("main_content")) or None
|
382
385
|
web_at = (
|
383
|
-
str(response.get("web_at")) or "(no web
|
386
|
+
str(response.get("web_at")) or "(no web accessibility tree available)"
|
384
387
|
)
|
385
388
|
# Remove base64 data from images.
|
386
389
|
web_at_lines = web_at.split("\n")
|
387
390
|
web_at_lines = [
|
388
391
|
line.partition("data:image/png;base64")[0] for line in web_at_lines
|
389
392
|
]
|
390
|
-
|
393
|
+
|
394
|
+
store_as(WebBrowserStore).main_content = (
|
395
|
+
main_content or "(no main text summary)"
|
396
|
+
)
|
391
397
|
store_as(WebBrowserStore).web_at = web_at
|
392
|
-
|
398
|
+
|
399
|
+
web_at = "\n".join(web_at_lines)
|
400
|
+
return (
|
401
|
+
[
|
402
|
+
ContentText(text=f"main content:\n{main_content}\n\n"),
|
403
|
+
ContentText(text=f"accessibility tree:\n{web_at}"),
|
404
|
+
]
|
405
|
+
if main_content
|
406
|
+
else web_at
|
407
|
+
)
|
393
408
|
else:
|
394
409
|
raise RuntimeError(
|
395
410
|
f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
|
@@ -425,7 +440,9 @@ async def web_browser_sandbox() -> SandboxEnvironment:
|
|
425
440
|
|
426
441
|
|
427
442
|
def parse_web_browser_output(output: str) -> dict[str, str]:
|
428
|
-
response: dict[str, str] = dict(
|
443
|
+
response: dict[str, str] = dict(
|
444
|
+
web_url="", main_content="", web_at="", info="", error=""
|
445
|
+
)
|
429
446
|
active_field: str | None = None
|
430
447
|
active_field_lines: list[str] = []
|
431
448
|
|
@@ -435,7 +452,9 @@ def parse_web_browser_output(output: str) -> dict[str, str]:
|
|
435
452
|
active_field_lines.clear()
|
436
453
|
|
437
454
|
for line in output.splitlines():
|
438
|
-
field_match = re.match(
|
455
|
+
field_match = re.match(
|
456
|
+
r"^(error|main_content|web_at|web_url|info)\s*:\s*(.+)$", line
|
457
|
+
)
|
439
458
|
if field_match:
|
440
459
|
collect_active_field()
|
441
460
|
active_field = field_match.group(1)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import os
|
3
|
-
from typing import Literal, Protocol,
|
3
|
+
from typing import Literal, Protocol, runtime_checkable
|
4
4
|
|
5
5
|
import httpx
|
6
6
|
from bs4 import BeautifulSoup, NavigableString
|
@@ -90,8 +90,8 @@ def web_search(
|
|
90
90
|
return_exceptions=True,
|
91
91
|
)
|
92
92
|
for page, link in zip(pages, links):
|
93
|
-
if page and not isinstance(page,
|
94
|
-
page_contents.append(
|
93
|
+
if page and not isinstance(page, BaseException):
|
94
|
+
page_contents.append(page)
|
95
95
|
urls.append(link.url)
|
96
96
|
snippets.append(link.snippet)
|
97
97
|
search_calls += 1
|
inspect_ai/util/__init__.py
CHANGED
@@ -2,7 +2,7 @@ from inspect_ai._util.trace import trace_action, trace_message
|
|
2
2
|
|
3
3
|
from ._concurrency import concurrency
|
4
4
|
from ._console import input_screen
|
5
|
-
from ._display import DisplayType, display_type
|
5
|
+
from ._display import DisplayType, display_counter, display_type
|
6
6
|
from ._panel import InputPanel, input_panel
|
7
7
|
from ._resource import resource
|
8
8
|
from ._sandbox import (
|
@@ -31,6 +31,7 @@ __all__ = [
|
|
31
31
|
"ExecResult",
|
32
32
|
"concurrency",
|
33
33
|
"DisplayType",
|
34
|
+
"display_counter",
|
34
35
|
"display_type",
|
35
36
|
"InputPanel",
|
36
37
|
"input_panel",
|
inspect_ai/util/_concurrency.py
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
import asyncio
|
2
|
+
import contextlib
|
3
|
+
import time
|
2
4
|
from dataclasses import dataclass
|
5
|
+
from typing import AsyncIterator
|
3
6
|
|
7
|
+
from inspect_ai._util.working import report_sample_waiting_time
|
4
8
|
|
5
|
-
|
9
|
+
|
10
|
+
@contextlib.asynccontextmanager
|
11
|
+
async def concurrency(
|
6
12
|
name: str,
|
7
13
|
concurrency: int,
|
8
14
|
key: str | None = None,
|
9
|
-
) ->
|
10
|
-
"""
|
15
|
+
) -> AsyncIterator[None]:
|
16
|
+
"""Concurrency context manager.
|
11
17
|
|
12
18
|
A concurrency context can be used to limit the number of coroutines
|
13
19
|
executing a block of code (e.g calling an API). For example, here
|
@@ -32,9 +38,6 @@ def concurrency(
|
|
32
38
|
Used if the unique key isn't human readable -- e.g. includes
|
33
39
|
api tokens or account ids so that the more readable `name`
|
34
40
|
can be presented to users e.g in console UI>
|
35
|
-
|
36
|
-
Returns:
|
37
|
-
Asyncio Semaphore for concurrency context.
|
38
41
|
"""
|
39
42
|
# sort out key
|
40
43
|
key = key if key else name
|
@@ -47,8 +50,11 @@ def concurrency(
|
|
47
50
|
)
|
48
51
|
_concurrency_semaphores[key] = semaphore
|
49
52
|
|
50
|
-
#
|
51
|
-
|
53
|
+
# wait and yield to protected code
|
54
|
+
start_wait = time.monotonic()
|
55
|
+
async with semaphore.semaphore:
|
56
|
+
report_sample_waiting_time(time.monotonic() - start_wait)
|
57
|
+
yield
|
52
58
|
|
53
59
|
|
54
60
|
def concurrency_status() -> dict[str, tuple[int, int]]:
|
inspect_ai/util/_display.py
CHANGED
@@ -54,3 +54,15 @@ def display_type() -> DisplayType:
|
|
54
54
|
def display_type_initialized() -> bool:
|
55
55
|
global _display_type
|
56
56
|
return _display_type is not None
|
57
|
+
|
58
|
+
|
59
|
+
def display_counter(caption: str, value: str) -> None:
|
60
|
+
"""Display a counter in the UI.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
caption: The counter's caption e.g. "HTTP rate limits".
|
64
|
+
value: The counter's value e.g. "42".
|
65
|
+
"""
|
66
|
+
from inspect_ai._display.core.active import display
|
67
|
+
|
68
|
+
display().display_counter(caption, value)
|
@@ -5,6 +5,7 @@ from typing import Any, NoReturn, cast
|
|
5
5
|
from shortuuid import uuid
|
6
6
|
|
7
7
|
from inspect_ai._util.constants import SANDBOX_SETUP_TIMEOUT
|
8
|
+
from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
|
8
9
|
|
9
10
|
from .environment import (
|
10
11
|
SampleCleanup,
|
@@ -132,6 +133,9 @@ async def init_sandbox_environments_sample(
|
|
132
133
|
# verify that there is at least one environment and a 'default' env
|
133
134
|
validate_sandbox_environments(sandboxenv_type, environments)
|
134
135
|
|
136
|
+
# proxy environments (for recording SandboxEvent)
|
137
|
+
environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
|
138
|
+
|
135
139
|
try:
|
136
140
|
# copy files into environments
|
137
141
|
await copy_sandbox_environment_files(files, environments)
|
@@ -148,6 +152,7 @@ async def init_sandbox_environments_sample(
|
|
148
152
|
return environments
|
149
153
|
|
150
154
|
except Exception as ex:
|
155
|
+
environments = unproxy_environments(environments)
|
151
156
|
await sample_cleanup(task_name, config, environments, True)
|
152
157
|
raise ex
|
153
158
|
|
@@ -161,9 +166,19 @@ async def cleanup_sandbox_environments_sample(
|
|
161
166
|
) -> None:
|
162
167
|
sandboxenv_type = registry_find_sandboxenv(type)
|
163
168
|
sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
|
169
|
+
environments = unproxy_environments(environments)
|
164
170
|
await sample_cleanup(task_name, config, environments, interrupted)
|
165
171
|
|
166
172
|
|
173
|
+
def unproxy_environments(
|
174
|
+
environments: dict[str, SandboxEnvironment],
|
175
|
+
) -> dict[str, SandboxEnvironment]:
|
176
|
+
return {
|
177
|
+
k: v._sandbox
|
178
|
+
for k, v in cast(dict[str, SandboxEnvironmentProxy], environments).items()
|
179
|
+
}
|
180
|
+
|
181
|
+
|
167
182
|
async def copy_sandbox_environment_files(
|
168
183
|
files: dict[str, bytes], environments: dict[str, SandboxEnvironment]
|
169
184
|
) -> None:
|
@@ -5,7 +5,7 @@ import os
|
|
5
5
|
import tempfile
|
6
6
|
from logging import getLogger
|
7
7
|
from pathlib import Path, PurePosixPath
|
8
|
-
from typing import Literal, Union,
|
8
|
+
from typing import Literal, Union, overload
|
9
9
|
|
10
10
|
from typing_extensions import override
|
11
11
|
|
@@ -145,7 +145,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
145
145
|
project = await ComposeProject.create(
|
146
146
|
name=task_project_name(task_name),
|
147
147
|
config=config,
|
148
|
-
sample_id=sample.id if sample is not None else None,
|
148
|
+
sample_id=sample.sample.id if sample is not None else None,
|
149
149
|
epoch=sample.epoch if sample is not None else None,
|
150
150
|
env=env,
|
151
151
|
)
|
@@ -221,9 +221,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
221
221
|
# (this enables us to show output for the cleanup operation)
|
222
222
|
if not interrupted:
|
223
223
|
# extract project from first environment
|
224
|
-
project =
|
225
|
-
|
226
|
-
|
224
|
+
project = (
|
225
|
+
next(iter(environments.values()))
|
226
|
+
.as_type(DockerSandboxEnvironment)
|
227
|
+
._project
|
228
|
+
)
|
227
229
|
# cleanup the project
|
228
230
|
await project_cleanup(project=project, quiet=True)
|
229
231
|
|
@@ -2,12 +2,24 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import abc
|
4
4
|
from dataclasses import dataclass, field
|
5
|
-
from typing import
|
5
|
+
from typing import (
|
6
|
+
Any,
|
7
|
+
Awaitable,
|
8
|
+
Callable,
|
9
|
+
Literal,
|
10
|
+
NamedTuple,
|
11
|
+
Type,
|
12
|
+
TypeVar,
|
13
|
+
Union,
|
14
|
+
overload,
|
15
|
+
)
|
6
16
|
|
7
17
|
from pydantic import BaseModel, Field
|
8
18
|
|
9
19
|
from .._subprocess import ExecResult
|
10
20
|
|
21
|
+
ST = TypeVar("ST", bound="SandboxEnvironment")
|
22
|
+
|
11
23
|
TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
|
12
24
|
TaskCleanup = Callable[
|
13
25
|
[str, Union["SandboxEnvironmentConfigType", None], bool], Awaitable[None]
|
@@ -180,6 +192,25 @@ class SandboxEnvironment(abc.ABC):
|
|
180
192
|
"""
|
181
193
|
raise NotImplementedError("connection not implemented")
|
182
194
|
|
195
|
+
def as_type(self, sandbox_cls: Type[ST]) -> ST:
|
196
|
+
"""Verify and return a reference to a subclass of SandboxEnvironment.
|
197
|
+
|
198
|
+
Args:
|
199
|
+
sandbox_cls: Class of sandbox (subclass of SandboxEnvironment)
|
200
|
+
|
201
|
+
Returns:
|
202
|
+
Reference to the sandbox using the requested type.
|
203
|
+
|
204
|
+
Raises:
|
205
|
+
TypeError: If the sandbox is not of the requested type.
|
206
|
+
"""
|
207
|
+
if isinstance(self, sandbox_cls):
|
208
|
+
return self
|
209
|
+
else:
|
210
|
+
raise TypeError(
|
211
|
+
f"Expected instance of {sandbox_cls.__name__}, got {type(self).__name__}"
|
212
|
+
)
|
213
|
+
|
183
214
|
@classmethod
|
184
215
|
def config_files(cls) -> list[str]:
|
185
216
|
"""Standard config files for this provider (used for automatic discovery)"""
|
@@ -0,0 +1,183 @@
|
|
1
|
+
import contextlib
|
2
|
+
import shlex
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Iterator, Literal, Type, Union, overload
|
5
|
+
|
6
|
+
from pydantic import JsonValue
|
7
|
+
from pydantic_core import to_jsonable_python
|
8
|
+
from typing_extensions import override
|
9
|
+
|
10
|
+
from inspect_ai._util.text import truncate_lines
|
11
|
+
from inspect_ai.util._subprocess import ExecResult
|
12
|
+
|
13
|
+
from .environment import (
|
14
|
+
ST,
|
15
|
+
SandboxConnection,
|
16
|
+
SandboxEnvironment,
|
17
|
+
SandboxEnvironmentConfigType,
|
18
|
+
)
|
19
|
+
|
20
|
+
|
21
|
+
class SandboxEnvironmentProxy(SandboxEnvironment):
|
22
|
+
def __init__(self, sandbox: SandboxEnvironment) -> None:
|
23
|
+
self._sandbox = sandbox
|
24
|
+
self._events = True
|
25
|
+
|
26
|
+
@override
|
27
|
+
async def exec(
|
28
|
+
self,
|
29
|
+
cmd: list[str],
|
30
|
+
input: str | bytes | None = None,
|
31
|
+
cwd: str | None = None,
|
32
|
+
env: dict[str, str] = {},
|
33
|
+
user: str | None = None,
|
34
|
+
timeout: int | None = None,
|
35
|
+
timeout_retry: bool = True,
|
36
|
+
) -> ExecResult[str]:
|
37
|
+
from inspect_ai.log._transcript import SandboxEvent, transcript
|
38
|
+
|
39
|
+
# started
|
40
|
+
timestamp = datetime.now()
|
41
|
+
|
42
|
+
# make call
|
43
|
+
result = await self._sandbox.exec(
|
44
|
+
cmd, input, cwd, env, user, timeout, timeout_retry
|
45
|
+
)
|
46
|
+
|
47
|
+
# yield event
|
48
|
+
options: dict[str, JsonValue] = {}
|
49
|
+
if cwd:
|
50
|
+
options["cwd"] = cwd
|
51
|
+
if env:
|
52
|
+
options["env"] = to_jsonable_python(env)
|
53
|
+
if user:
|
54
|
+
options["user"] = user
|
55
|
+
if timeout is not None:
|
56
|
+
options["timeout"] = timeout
|
57
|
+
if timeout_retry is not True:
|
58
|
+
options["timeout_retry"] = timeout_retry
|
59
|
+
|
60
|
+
if self._events:
|
61
|
+
transcript()._event(
|
62
|
+
SandboxEvent(
|
63
|
+
timestamp=timestamp,
|
64
|
+
action="exec",
|
65
|
+
cmd=" ".join([shlex.quote(c) for c in cmd]),
|
66
|
+
input=content_display(input) if input is not None else None,
|
67
|
+
options=options,
|
68
|
+
result=result.returncode,
|
69
|
+
output=content_display(
|
70
|
+
f"{result.stderr}\n\n{result.stdout}"
|
71
|
+
if result.stderr
|
72
|
+
else result.stdout
|
73
|
+
),
|
74
|
+
completed=datetime.now(),
|
75
|
+
)
|
76
|
+
)
|
77
|
+
|
78
|
+
# return result
|
79
|
+
return result
|
80
|
+
|
81
|
+
@override
|
82
|
+
async def write_file(self, file: str, contents: str | bytes) -> None:
|
83
|
+
from inspect_ai.log._transcript import SandboxEvent, transcript
|
84
|
+
|
85
|
+
timestamp = datetime.now()
|
86
|
+
|
87
|
+
# make call
|
88
|
+
await self._sandbox.write_file(file, contents)
|
89
|
+
|
90
|
+
# yield event
|
91
|
+
if self._events:
|
92
|
+
transcript()._event(
|
93
|
+
SandboxEvent(
|
94
|
+
timestamp=timestamp,
|
95
|
+
action="write_file",
|
96
|
+
file=file,
|
97
|
+
input=content_display(contents),
|
98
|
+
completed=datetime.now(),
|
99
|
+
)
|
100
|
+
)
|
101
|
+
|
102
|
+
@overload
|
103
|
+
async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
|
104
|
+
|
105
|
+
@overload
|
106
|
+
async def read_file(self, file: str, text: Literal[False]) -> bytes: ...
|
107
|
+
|
108
|
+
@override
|
109
|
+
async def read_file(self, file: str, text: bool = True) -> Union[str | bytes]:
|
110
|
+
from inspect_ai.log._transcript import SandboxEvent, transcript
|
111
|
+
|
112
|
+
timestamp = datetime.now()
|
113
|
+
|
114
|
+
# make call
|
115
|
+
if text is True:
|
116
|
+
output: str | bytes = await self._sandbox.read_file(file, True)
|
117
|
+
else:
|
118
|
+
output = await self._sandbox.read_file(file, False)
|
119
|
+
|
120
|
+
# yield event
|
121
|
+
if self._events:
|
122
|
+
transcript()._event(
|
123
|
+
SandboxEvent(
|
124
|
+
timestamp=timestamp,
|
125
|
+
action="read_file",
|
126
|
+
file=file,
|
127
|
+
output=content_display(output),
|
128
|
+
completed=datetime.now(),
|
129
|
+
)
|
130
|
+
)
|
131
|
+
|
132
|
+
# return result
|
133
|
+
return output
|
134
|
+
|
135
|
+
@override
|
136
|
+
async def connection(self) -> SandboxConnection:
|
137
|
+
return await self._sandbox.connection()
|
138
|
+
|
139
|
+
@override
|
140
|
+
def as_type(self, sandbox_cls: Type[ST]) -> ST:
|
141
|
+
if isinstance(self._sandbox, sandbox_cls):
|
142
|
+
return self._sandbox
|
143
|
+
else:
|
144
|
+
raise TypeError(
|
145
|
+
f"Expected instance of {sandbox_cls.__name__}, got {type(self._sandbox).__name__}"
|
146
|
+
)
|
147
|
+
|
148
|
+
@contextlib.contextmanager
|
149
|
+
def no_events(self) -> Iterator[None]:
|
150
|
+
self._events = False
|
151
|
+
try:
|
152
|
+
yield
|
153
|
+
finally:
|
154
|
+
self._events = True
|
155
|
+
|
156
|
+
@classmethod
|
157
|
+
async def sample_cleanup(
|
158
|
+
cls,
|
159
|
+
task_name: str,
|
160
|
+
config: SandboxEnvironmentConfigType | None,
|
161
|
+
environments: dict[str, SandboxEnvironment],
|
162
|
+
interrupted: bool,
|
163
|
+
) -> None:
|
164
|
+
pass
|
165
|
+
|
166
|
+
|
167
|
+
def content_display(content: str | bytes) -> str:
|
168
|
+
if isinstance(content, str):
|
169
|
+
content, truncated = truncate_lines(content, 20)
|
170
|
+
if truncated:
|
171
|
+
content = f"{content}\n\nOutput truncated ({truncated} additional lines)"
|
172
|
+
return content
|
173
|
+
else:
|
174
|
+
return f"binary ({pretty_size(len(content))})"
|
175
|
+
|
176
|
+
|
177
|
+
def pretty_size(size: int) -> str:
|
178
|
+
if size < 1024:
|
179
|
+
return f"{size} B"
|
180
|
+
if size < 1024 * 1024:
|
181
|
+
return f"{size / 1024:.2f} KB"
|
182
|
+
|
183
|
+
return f"{size / (1024 * 1024):.2f} MB"
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import tempfile
|
2
2
|
import warnings
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Literal, Union,
|
4
|
+
from typing import Literal, Union, overload
|
5
5
|
|
6
6
|
from typing_extensions import override
|
7
7
|
|
@@ -40,8 +40,8 @@ class LocalSandboxEnvironment(SandboxEnvironment):
|
|
40
40
|
interrupted: bool,
|
41
41
|
) -> None:
|
42
42
|
for environment in environments.values():
|
43
|
-
|
44
|
-
|
43
|
+
sandbox = environment.as_type(LocalSandboxEnvironment)
|
44
|
+
sandbox.directory.cleanup()
|
45
45
|
|
46
46
|
def __init__(self) -> None:
|
47
47
|
self.directory = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|