inspect-ai 0.3.70__py3-none-any.whl → 0.3.72__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +14 -8
- inspect_ai/_display/core/display.py +2 -0
- inspect_ai/_display/core/footer.py +13 -3
- inspect_ai/_display/plain/display.py +6 -2
- inspect_ai/_display/rich/display.py +19 -6
- inspect_ai/_display/textual/app.py +6 -1
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/transcript.py +10 -6
- inspect_ai/_eval/task/run.py +5 -8
- inspect_ai/_util/content.py +20 -1
- inspect_ai/_util/transcript.py +10 -4
- inspect_ai/_util/working.py +4 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +115 -87
- inspect_ai/_view/www/dist/assets/index.js +5324 -2276
- inspect_ai/_view/www/eslint.config.mjs +24 -1
- inspect_ai/_view/www/log-schema.json +283 -20
- inspect_ai/_view/www/package.json +8 -3
- inspect_ai/_view/www/src/App.tsx +2 -2
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
- inspect_ai/_view/www/src/components/Card.tsx +9 -8
- inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
- inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
- inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
- inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
- inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
- inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
- inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
- inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
- inspect_ai/_view/www/src/index.tsx +2 -2
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
- inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
- inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
- inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +5 -4
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +8 -7
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +34 -15
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
- inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
- inspect_ai/_view/www/src/types/log.d.ts +129 -34
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
- inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
- inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
- inspect_ai/_view/www/src/utils/format.ts +1 -1
- inspect_ai/_view/www/src/utils/json.ts +24 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -2
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
- inspect_ai/_view/www/yarn.lock +241 -5
- inspect_ai/log/_condense.py +3 -0
- inspect_ai/log/_recorders/eval.py +6 -1
- inspect_ai/log/_transcript.py +58 -1
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +7 -0
- inspect_ai/model/_chat_message.py +22 -7
- inspect_ai/model/_conversation.py +10 -8
- inspect_ai/model/_generate_config.py +25 -4
- inspect_ai/model/_model.py +133 -57
- inspect_ai/model/_model_output.py +3 -0
- inspect_ai/model/_openai.py +106 -40
- inspect_ai/model/_providers/anthropic.py +281 -153
- inspect_ai/model/_providers/google.py +27 -8
- inspect_ai/model/_providers/groq.py +9 -4
- inspect_ai/model/_providers/openai.py +57 -4
- inspect_ai/model/_providers/openai_o1.py +10 -0
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_reasoning.py +15 -2
- inspect_ai/scorer/_model.py +23 -19
- inspect_ai/solver/_human_agent/agent.py +14 -10
- inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
- inspect_ai/solver/_human_agent/commands/submit.py +76 -30
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +3 -1
- inspect_ai/tool/_tools/_computer/_common.py +117 -58
- inspect_ai/tool/_tools/_computer/_computer.py +80 -57
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_computer/test_args.py +151 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
- inspect_ai/util/__init__.py +2 -1
- inspect_ai/util/_display.py +12 -0
- inspect_ai/util/_sandbox/events.py +55 -21
- inspect_ai/util/_sandbox/self_check.py +131 -43
- inspect_ai/util/_subtask.py +11 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/RECORD +209 -186
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
- inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
- inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
- inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/top_level.txt +0 -0
@@ -1,52 +1,191 @@
|
|
1
|
-
|
1
|
+
import threading
|
2
|
+
from typing import Awaitable, Callable, Unpack
|
2
3
|
|
3
|
-
from
|
4
|
+
from aiohttp.web import Application, Request, Response, run_app
|
5
|
+
from jsonrpcserver import Result, Success, async_dispatch, method
|
4
6
|
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import
|
8
|
-
from
|
9
|
-
from
|
7
|
+
from constants import DEFAULT_SESSION_NAME, SERVER_PORT
|
8
|
+
from playwright_browser import PlaywrightBrowser
|
9
|
+
from playwright_crawler import PlaywrightCrawler
|
10
|
+
from scale_factor import get_screen_scale_factor
|
11
|
+
from web_browser_rpc_types import (
|
12
|
+
ClickArgs,
|
13
|
+
CrawlerBaseArgs,
|
14
|
+
CrawlerResponse,
|
15
|
+
GoArgs,
|
16
|
+
NewSessionArgs,
|
17
|
+
NewSessionResponse,
|
18
|
+
ScrollArgs,
|
19
|
+
TypeOrSubmitArgs,
|
20
|
+
)
|
10
21
|
|
11
|
-
_DM_ENV_BASE_PORT = 9443
|
12
22
|
|
23
|
+
class Sessions:
|
24
|
+
def __init__(self) -> None:
|
25
|
+
self._lock = threading.Lock()
|
26
|
+
self._browser: PlaywrightBrowser | None = None
|
27
|
+
self._sessions: dict[str, PlaywrightCrawler] = {}
|
13
28
|
|
14
|
-
def
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
env_service = dm_env_servicer.EnvironmentService(web_environment.WebEnvironment)
|
31
|
-
dm_env_rpc_pb2_grpc.add_EnvironmentServicer_to_server(env_service, grpc_server)
|
29
|
+
async def new_session(self, headful: bool) -> str:
|
30
|
+
with self._lock:
|
31
|
+
if not self._browser:
|
32
|
+
self._browser = await PlaywrightBrowser.create(headless=not headful)
|
33
|
+
current_count = len(self._sessions)
|
34
|
+
name = (
|
35
|
+
DEFAULT_SESSION_NAME
|
36
|
+
if current_count == 0
|
37
|
+
else f"{DEFAULT_SESSION_NAME}_{current_count}"
|
38
|
+
)
|
39
|
+
crawler = await PlaywrightCrawler.create(
|
40
|
+
await self._browser.get_new_context(),
|
41
|
+
device_scale_factor=get_screen_scale_factor() if headful else 1,
|
42
|
+
)
|
43
|
+
self._sessions[name] = crawler
|
44
|
+
return name
|
32
45
|
|
33
|
-
|
34
|
-
|
35
|
-
|
46
|
+
async def get_crawler_for_session(self, name: str) -> PlaywrightCrawler:
|
47
|
+
if not self._sessions:
|
48
|
+
await self.new_session(False)
|
49
|
+
return self._sessions[name]
|
36
50
|
|
37
|
-
grpc_server.start()
|
38
51
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
52
|
+
sessions = Sessions()
|
53
|
+
|
54
|
+
|
55
|
+
@method
|
56
|
+
async def new_session(**kwargs: Unpack[NewSessionArgs]) -> NewSessionResponse:
|
57
|
+
return Success(
|
58
|
+
NewSessionResponse(
|
59
|
+
session_name=await sessions.new_session(kwargs.get("headful", False))
|
60
|
+
).model_dump()
|
44
61
|
)
|
45
|
-
connection = dm_env_connection.Connection(channel)
|
46
|
-
connection.send(dm_env_rpc_pb2.CreateWorldRequest())
|
47
|
-
connection.close()
|
48
62
|
|
49
|
-
|
63
|
+
|
64
|
+
@method
|
65
|
+
async def web_go(**kwargs: Unpack[GoArgs]) -> Result:
|
66
|
+
async def handler(crawler: PlaywrightCrawler):
|
67
|
+
await (await crawler.current_page).go_to_url(kwargs["url"])
|
68
|
+
|
69
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
70
|
+
|
71
|
+
|
72
|
+
@method
|
73
|
+
async def web_click(**kwargs: Unpack[ClickArgs]) -> Result:
|
74
|
+
async def handler(crawler: PlaywrightCrawler):
|
75
|
+
await (await crawler.current_page).click(kwargs["element_id"])
|
76
|
+
|
77
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
78
|
+
|
79
|
+
|
80
|
+
@method
|
81
|
+
async def web_scroll(**kwargs: Unpack[ScrollArgs]) -> Result:
|
82
|
+
async def handler(crawler: PlaywrightCrawler):
|
83
|
+
await (await crawler.current_page).scroll(kwargs["direction"])
|
84
|
+
|
85
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
86
|
+
|
87
|
+
|
88
|
+
@method
|
89
|
+
async def web_forward(**kwargs: Unpack[CrawlerBaseArgs]) -> Result:
|
90
|
+
async def handler(crawler: PlaywrightCrawler):
|
91
|
+
await (await crawler.current_page).forward()
|
92
|
+
|
93
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
94
|
+
|
95
|
+
|
96
|
+
@method
|
97
|
+
async def web_back(**kwargs: Unpack[CrawlerBaseArgs]) -> Result:
|
98
|
+
async def handler(crawler: PlaywrightCrawler):
|
99
|
+
await (await crawler.current_page).back()
|
100
|
+
|
101
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
102
|
+
|
103
|
+
|
104
|
+
@method
|
105
|
+
async def web_refresh(**kwargs: Unpack[CrawlerBaseArgs]) -> Result:
|
106
|
+
async def handler(crawler: PlaywrightCrawler):
|
107
|
+
await (await crawler.current_page).refresh()
|
108
|
+
|
109
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
110
|
+
|
111
|
+
|
112
|
+
@method
|
113
|
+
async def web_type(**kwargs: Unpack[TypeOrSubmitArgs]) -> Result:
|
114
|
+
async def handler(crawler: PlaywrightCrawler):
|
115
|
+
await (await crawler.current_page).type(
|
116
|
+
kwargs["element_id"], _str_from_str_or_list(kwargs["text"])
|
117
|
+
)
|
118
|
+
|
119
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
120
|
+
|
121
|
+
|
122
|
+
@method
|
123
|
+
async def web_type_submit(**kwargs: Unpack[TypeOrSubmitArgs]) -> Result:
|
124
|
+
async def handler(crawler: PlaywrightCrawler):
|
125
|
+
await (await crawler.current_page).clear(kwargs["element_id"])
|
126
|
+
await (await crawler.current_page).type(
|
127
|
+
kwargs["element_id"], _str_from_str_or_list(kwargs["text"]) + "\n"
|
128
|
+
)
|
129
|
+
|
130
|
+
return await _execute_crawler_command(kwargs["session_name"], handler)
|
131
|
+
|
132
|
+
|
133
|
+
async def _execute_crawler_command(
|
134
|
+
session_name: str, handler: Callable[[PlaywrightCrawler], Awaitable[None]]
|
135
|
+
) -> Result:
|
136
|
+
if not sessions:
|
137
|
+
await new_session()
|
138
|
+
try:
|
139
|
+
crawler = await sessions.get_crawler_for_session(session_name)
|
140
|
+
await handler(crawler)
|
141
|
+
await (await crawler.current_page).update()
|
142
|
+
|
143
|
+
# If there's a cookies message click to sort it out.
|
144
|
+
await _auto_click_cookies(crawler)
|
145
|
+
|
146
|
+
return Success(
|
147
|
+
CrawlerResponse(
|
148
|
+
web_url=(await crawler.current_page).url.split("?")[0],
|
149
|
+
main_content=(await crawler.current_page).render_main_content(),
|
150
|
+
web_at=(await crawler.current_page).render_at(),
|
151
|
+
error=None,
|
152
|
+
).model_dump()
|
153
|
+
)
|
154
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
155
|
+
return Success(
|
156
|
+
CrawlerResponse(
|
157
|
+
web_url=(await crawler.current_page).url.split("?")[0],
|
158
|
+
web_at="encountered error",
|
159
|
+
error=str(e),
|
160
|
+
).model_dump()
|
161
|
+
)
|
162
|
+
|
163
|
+
|
164
|
+
def _str_from_str_or_list(str_or_list: str | list[str]) -> str:
|
165
|
+
return str_or_list if isinstance(str_or_list, str) else " ".join(str_or_list)
|
166
|
+
|
167
|
+
|
168
|
+
async def _auto_click_cookies(crawler: PlaywrightCrawler):
|
169
|
+
"""Autoclick any cookies popup."""
|
170
|
+
try:
|
171
|
+
accept_node = (await crawler.current_page).lookup_node("<Accept all>")
|
172
|
+
except LookupError:
|
173
|
+
return
|
174
|
+
await (await crawler.current_page).click(accept_node.node_id)
|
175
|
+
await (await crawler.current_page).update()
|
176
|
+
|
177
|
+
|
178
|
+
def main():
|
179
|
+
async def handle_request(request: Request) -> Response:
|
180
|
+
return Response(
|
181
|
+
text=await async_dispatch(await request.text()),
|
182
|
+
content_type="application/json",
|
183
|
+
)
|
184
|
+
|
185
|
+
app = Application()
|
186
|
+
app.router.add_post("/", handle_request)
|
187
|
+
|
188
|
+
run_app(app, port=SERVER_PORT)
|
50
189
|
|
51
190
|
|
52
191
|
if __name__ == "__main__":
|
@@ -3,8 +3,9 @@ from textwrap import dedent
|
|
3
3
|
|
4
4
|
from pydantic import Field
|
5
5
|
|
6
|
+
from inspect_ai._util.content import ContentText
|
6
7
|
from inspect_ai._util.error import PrerequisiteError
|
7
|
-
from inspect_ai.tool._tool import Tool, ToolError, tool
|
8
|
+
from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
|
8
9
|
from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
|
9
10
|
from inspect_ai.tool._tool_info import parse_tool_info
|
10
11
|
from inspect_ai.tool._tool_with import tool_with
|
@@ -58,10 +59,10 @@ def web_browser_go() -> Tool:
|
|
58
59
|
Web browser navigation tool.
|
59
60
|
"""
|
60
61
|
|
61
|
-
async def execute(url: str) ->
|
62
|
+
async def execute(url: str) -> ToolResult:
|
62
63
|
"""Navigate the web browser to a URL.
|
63
64
|
|
64
|
-
Once you have navigated to a page, you will be presented with a web
|
65
|
+
Once you have navigated to a page, you will be presented with a web accessibility tree of the elements on the page. Each element has an ID, which is displayed in brackets at the beginning of its line. For example:
|
65
66
|
|
66
67
|
```
|
67
68
|
[1] RootWebArea "Google" [focused: True, url: https://www.google.com/]
|
@@ -99,16 +100,17 @@ def go_without_interactive_docs(tool: Tool) -> Tool:
|
|
99
100
|
|
100
101
|
|
101
102
|
# custom viewer for interactive tool calls that shows a truncated
|
102
|
-
# version of current the web
|
103
|
+
# version of current the web accessibility tree if available
|
103
104
|
|
104
105
|
|
105
106
|
class WebBrowserStore(StoreModel):
|
107
|
+
main_content: str = Field(default_factory=str)
|
106
108
|
web_at: str = Field(default_factory=str)
|
107
109
|
session_id: str = Field(default_factory=str)
|
108
110
|
|
109
111
|
|
110
112
|
def web_at_viewer(call: ToolCall) -> ToolCallView:
|
111
|
-
# get the web
|
113
|
+
# get the web accessibility tree, if we have it create a view from it
|
112
114
|
web_at = store_as(WebBrowserStore).web_at
|
113
115
|
element_id = call.arguments.get("element_id", 0)
|
114
116
|
if web_at and element_id:
|
@@ -141,10 +143,10 @@ def web_browser_click() -> Tool:
|
|
141
143
|
Web browser clicking tool.
|
142
144
|
"""
|
143
145
|
|
144
|
-
async def execute(element_id: int) ->
|
146
|
+
async def execute(element_id: int) -> ToolResult:
|
145
147
|
"""Click an element on the page currently displayed by the web browser.
|
146
148
|
|
147
|
-
For example, with the following web
|
149
|
+
For example, with the following web accessibility tree:
|
148
150
|
|
149
151
|
```
|
150
152
|
[304] RootWebArea "Poetry Foundation" [focused: True, url: https://www.poetryfoundation.org/]
|
@@ -176,7 +178,7 @@ def web_browser_type_submit() -> Tool:
|
|
176
178
|
Web browser type and submit tool.
|
177
179
|
"""
|
178
180
|
|
179
|
-
async def execute(element_id: int, text: str) ->
|
181
|
+
async def execute(element_id: int, text: str) -> ToolResult:
|
180
182
|
"""Type text into a form input on a web browser page and press ENTER to submit the form.
|
181
183
|
|
182
184
|
For example, to execute a search for "Yeats" from this page:
|
@@ -214,7 +216,7 @@ def web_browser_type() -> Tool:
|
|
214
216
|
Web browser typing tool.
|
215
217
|
"""
|
216
218
|
|
217
|
-
async def execute(element_id: int, text: str) ->
|
219
|
+
async def execute(element_id: int, text: str) -> ToolResult:
|
218
220
|
"""Type text into an input on a web browser page.
|
219
221
|
|
220
222
|
For example, to type "Norah" into the "First Name" search box on this page:
|
@@ -252,7 +254,7 @@ def web_browser_scroll() -> Tool:
|
|
252
254
|
Web browser scrolling tool.
|
253
255
|
"""
|
254
256
|
|
255
|
-
async def execute(direction: str) ->
|
257
|
+
async def execute(direction: str) -> ToolResult:
|
256
258
|
"""Scroll the web browser up or down by one page.
|
257
259
|
|
258
260
|
Occasionally some very long pages don't display all of their content at once. To see additional content you can scroll the page down with:
|
@@ -282,7 +284,7 @@ def web_browser_back() -> Tool:
|
|
282
284
|
Web browser back navigation tool.
|
283
285
|
"""
|
284
286
|
|
285
|
-
async def execute() ->
|
287
|
+
async def execute() -> ToolResult:
|
286
288
|
"""Navigate the web browser back in the browser history.
|
287
289
|
|
288
290
|
If you want to view a page that you have previously browsed (or perhaps just didn't find what you were looking for on a page and want to backtrack) use the web_browser_back tool.
|
@@ -303,7 +305,7 @@ def web_browser_forward() -> Tool:
|
|
303
305
|
Web browser forward navigation tool.
|
304
306
|
"""
|
305
307
|
|
306
|
-
async def execute() ->
|
308
|
+
async def execute() -> ToolResult:
|
307
309
|
"""Navigate the web browser forward in the browser history.
|
308
310
|
|
309
311
|
If you have navigated back in the browser history and then want to navigate forward use the web_browser_forward tool.
|
@@ -324,7 +326,7 @@ def web_browser_refresh() -> Tool:
|
|
324
326
|
Web browser page refresh tool.
|
325
327
|
"""
|
326
328
|
|
327
|
-
async def execute() ->
|
329
|
+
async def execute() -> ToolResult:
|
328
330
|
"""Refresh the current page of the web browser.
|
329
331
|
|
330
332
|
If you have interacted with a page by clicking buttons and want to reset it to its original state, use the web_browser_refresh tool.
|
@@ -341,7 +343,7 @@ WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
|
|
341
343
|
WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
|
342
344
|
|
343
345
|
|
344
|
-
async def web_browser_cmd(cmd: str, *args: str) ->
|
346
|
+
async def web_browser_cmd(cmd: str, *args: str) -> ToolResult:
|
345
347
|
sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
|
346
348
|
session_flag = ""
|
347
349
|
if sandbox_env:
|
@@ -379,17 +381,30 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
|
|
379
381
|
if "error" in response and response.get("error", "").strip() != "":
|
380
382
|
raise ToolError(str(response.get("error")) or "(unknown error)")
|
381
383
|
elif "web_at" in response:
|
384
|
+
main_content = str(response.get("main_content")) or None
|
382
385
|
web_at = (
|
383
|
-
str(response.get("web_at")) or "(no web
|
386
|
+
str(response.get("web_at")) or "(no web accessibility tree available)"
|
384
387
|
)
|
385
388
|
# Remove base64 data from images.
|
386
389
|
web_at_lines = web_at.split("\n")
|
387
390
|
web_at_lines = [
|
388
391
|
line.partition("data:image/png;base64")[0] for line in web_at_lines
|
389
392
|
]
|
390
|
-
|
393
|
+
|
394
|
+
store_as(WebBrowserStore).main_content = (
|
395
|
+
main_content or "(no main text summary)"
|
396
|
+
)
|
391
397
|
store_as(WebBrowserStore).web_at = web_at
|
392
|
-
|
398
|
+
|
399
|
+
web_at = "\n".join(web_at_lines)
|
400
|
+
return (
|
401
|
+
[
|
402
|
+
ContentText(text=f"main content:\n{main_content}\n\n"),
|
403
|
+
ContentText(text=f"accessibility tree:\n{web_at}"),
|
404
|
+
]
|
405
|
+
if main_content
|
406
|
+
else web_at
|
407
|
+
)
|
393
408
|
else:
|
394
409
|
raise RuntimeError(
|
395
410
|
f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
|
@@ -425,7 +440,9 @@ async def web_browser_sandbox() -> SandboxEnvironment:
|
|
425
440
|
|
426
441
|
|
427
442
|
def parse_web_browser_output(output: str) -> dict[str, str]:
|
428
|
-
response: dict[str, str] = dict(
|
443
|
+
response: dict[str, str] = dict(
|
444
|
+
web_url="", main_content="", web_at="", info="", error=""
|
445
|
+
)
|
429
446
|
active_field: str | None = None
|
430
447
|
active_field_lines: list[str] = []
|
431
448
|
|
@@ -435,7 +452,9 @@ def parse_web_browser_output(output: str) -> dict[str, str]:
|
|
435
452
|
active_field_lines.clear()
|
436
453
|
|
437
454
|
for line in output.splitlines():
|
438
|
-
field_match = re.match(
|
455
|
+
field_match = re.match(
|
456
|
+
r"^(error|main_content|web_at|web_url|info)\s*:\s*(.+)$", line
|
457
|
+
)
|
439
458
|
if field_match:
|
440
459
|
collect_active_field()
|
441
460
|
active_field = field_match.group(1)
|
inspect_ai/util/__init__.py
CHANGED
@@ -2,7 +2,7 @@ from inspect_ai._util.trace import trace_action, trace_message
|
|
2
2
|
|
3
3
|
from ._concurrency import concurrency
|
4
4
|
from ._console import input_screen
|
5
|
-
from ._display import DisplayType, display_type
|
5
|
+
from ._display import DisplayType, display_counter, display_type
|
6
6
|
from ._panel import InputPanel, input_panel
|
7
7
|
from ._resource import resource
|
8
8
|
from ._sandbox import (
|
@@ -31,6 +31,7 @@ __all__ = [
|
|
31
31
|
"ExecResult",
|
32
32
|
"concurrency",
|
33
33
|
"DisplayType",
|
34
|
+
"display_counter",
|
34
35
|
"display_type",
|
35
36
|
"InputPanel",
|
36
37
|
"input_panel",
|
inspect_ai/util/_display.py
CHANGED
@@ -54,3 +54,15 @@ def display_type() -> DisplayType:
|
|
54
54
|
def display_type_initialized() -> bool:
|
55
55
|
global _display_type
|
56
56
|
return _display_type is not None
|
57
|
+
|
58
|
+
|
59
|
+
def display_counter(caption: str, value: str) -> None:
|
60
|
+
"""Display a counter in the UI.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
caption: The counter's caption e.g. "HTTP rate limits".
|
64
|
+
value: The counter's value e.g. "42".
|
65
|
+
"""
|
66
|
+
from inspect_ai._display.core.active import display
|
67
|
+
|
68
|
+
display().display_counter(caption, value)
|
@@ -1,5 +1,7 @@
|
|
1
|
+
import contextlib
|
1
2
|
import shlex
|
2
|
-
from
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Iterator, Literal, Type, Union, overload
|
3
5
|
|
4
6
|
from pydantic import JsonValue
|
5
7
|
from pydantic_core import to_jsonable_python
|
@@ -19,6 +21,7 @@ from .environment import (
|
|
19
21
|
class SandboxEnvironmentProxy(SandboxEnvironment):
|
20
22
|
def __init__(self, sandbox: SandboxEnvironment) -> None:
|
21
23
|
self._sandbox = sandbox
|
24
|
+
self._events = True
|
22
25
|
|
23
26
|
@override
|
24
27
|
async def exec(
|
@@ -33,6 +36,9 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
|
|
33
36
|
) -> ExecResult[str]:
|
34
37
|
from inspect_ai.log._transcript import SandboxEvent, transcript
|
35
38
|
|
39
|
+
# started
|
40
|
+
timestamp = datetime.now()
|
41
|
+
|
36
42
|
# make call
|
37
43
|
result = await self._sandbox.exec(
|
38
44
|
cmd, input, cwd, env, user, timeout, timeout_retry
|
@@ -50,20 +56,24 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
|
|
50
56
|
options["timeout"] = timeout
|
51
57
|
if timeout_retry is not True:
|
52
58
|
options["timeout_retry"] = timeout_retry
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
59
|
+
|
60
|
+
if self._events:
|
61
|
+
transcript()._event(
|
62
|
+
SandboxEvent(
|
63
|
+
timestamp=timestamp,
|
64
|
+
action="exec",
|
65
|
+
cmd=" ".join([shlex.quote(c) for c in cmd]),
|
66
|
+
input=content_display(input) if input is not None else None,
|
67
|
+
options=options,
|
68
|
+
result=result.returncode,
|
69
|
+
output=content_display(
|
70
|
+
f"{result.stderr}\n\n{result.stdout}"
|
71
|
+
if result.stderr
|
72
|
+
else result.stdout
|
73
|
+
),
|
74
|
+
completed=datetime.now(),
|
75
|
+
)
|
65
76
|
)
|
66
|
-
)
|
67
77
|
|
68
78
|
# return result
|
69
79
|
return result
|
@@ -72,15 +82,22 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
|
|
72
82
|
async def write_file(self, file: str, contents: str | bytes) -> None:
|
73
83
|
from inspect_ai.log._transcript import SandboxEvent, transcript
|
74
84
|
|
85
|
+
timestamp = datetime.now()
|
86
|
+
|
75
87
|
# make call
|
76
88
|
await self._sandbox.write_file(file, contents)
|
77
89
|
|
78
90
|
# yield event
|
79
|
-
|
80
|
-
|
81
|
-
|
91
|
+
if self._events:
|
92
|
+
transcript()._event(
|
93
|
+
SandboxEvent(
|
94
|
+
timestamp=timestamp,
|
95
|
+
action="write_file",
|
96
|
+
file=file,
|
97
|
+
input=content_display(contents),
|
98
|
+
completed=datetime.now(),
|
99
|
+
)
|
82
100
|
)
|
83
|
-
)
|
84
101
|
|
85
102
|
@overload
|
86
103
|
async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
|
@@ -92,6 +109,8 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
|
|
92
109
|
async def read_file(self, file: str, text: bool = True) -> Union[str | bytes]:
|
93
110
|
from inspect_ai.log._transcript import SandboxEvent, transcript
|
94
111
|
|
112
|
+
timestamp = datetime.now()
|
113
|
+
|
95
114
|
# make call
|
96
115
|
if text is True:
|
97
116
|
output: str | bytes = await self._sandbox.read_file(file, True)
|
@@ -99,9 +118,16 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
|
|
99
118
|
output = await self._sandbox.read_file(file, False)
|
100
119
|
|
101
120
|
# yield event
|
102
|
-
|
103
|
-
|
104
|
-
|
121
|
+
if self._events:
|
122
|
+
transcript()._event(
|
123
|
+
SandboxEvent(
|
124
|
+
timestamp=timestamp,
|
125
|
+
action="read_file",
|
126
|
+
file=file,
|
127
|
+
output=content_display(output),
|
128
|
+
completed=datetime.now(),
|
129
|
+
)
|
130
|
+
)
|
105
131
|
|
106
132
|
# return result
|
107
133
|
return output
|
@@ -119,6 +145,14 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
|
|
119
145
|
f"Expected instance of {sandbox_cls.__name__}, got {type(self._sandbox).__name__}"
|
120
146
|
)
|
121
147
|
|
148
|
+
@contextlib.contextmanager
|
149
|
+
def no_events(self) -> Iterator[None]:
|
150
|
+
self._events = False
|
151
|
+
try:
|
152
|
+
yield
|
153
|
+
finally:
|
154
|
+
self._events = True
|
155
|
+
|
122
156
|
@classmethod
|
123
157
|
async def sample_cleanup(
|
124
158
|
cls,
|