inspect-ai 0.3.92__py3-none-any.whl → 0.3.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +27 -0
- inspect_ai/_eval/eval.py +19 -2
- inspect_ai/_eval/evalset.py +4 -1
- inspect_ai/_eval/run.py +41 -0
- inspect_ai/_eval/task/generate.py +38 -44
- inspect_ai/_eval/task/log.py +26 -28
- inspect_ai/_eval/task/run.py +13 -20
- inspect_ai/_util/local_server.py +368 -0
- inspect_ai/_util/working.py +10 -4
- inspect_ai/_view/www/dist/assets/index.css +159 -146
- inspect_ai/_view/www/dist/assets/index.js +1020 -1061
- inspect_ai/_view/www/log-schema.json +4 -3
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/_view/www/src/@types/log.d.ts +3 -2
- inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
- inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
- inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
- inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
- inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
- inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
- inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
- inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
- inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
- inspect_ai/_view/www/src/components/Card.css +0 -1
- inspect_ai/_view/www/src/constants.ts +2 -0
- inspect_ai/_view/www/src/utils/numeric.ts +17 -0
- inspect_ai/agent/_agent.py +3 -3
- inspect_ai/agent/_as_solver.py +20 -12
- inspect_ai/agent/_as_tool.py +15 -3
- inspect_ai/agent/_handoff.py +8 -1
- inspect_ai/agent/_run.py +11 -3
- inspect_ai/log/__init__.py +4 -0
- inspect_ai/log/_file.py +56 -0
- inspect_ai/log/_log.py +99 -0
- inspect_ai/log/_recorders/__init__.py +2 -0
- inspect_ai/log/_recorders/buffer/database.py +12 -11
- inspect_ai/log/_recorders/buffer/filestore.py +2 -2
- inspect_ai/log/_recorders/buffer/types.py +2 -2
- inspect_ai/log/_recorders/eval.py +20 -65
- inspect_ai/log/_recorders/file.py +28 -6
- inspect_ai/log/_recorders/recorder.py +7 -0
- inspect_ai/log/_recorders/types.py +1 -23
- inspect_ai/log/_samples.py +0 -8
- inspect_ai/log/_transcript.py +7 -1
- inspect_ai/log/_util.py +52 -0
- inspect_ai/model/__init__.py +5 -1
- inspect_ai/model/_call_tools.py +32 -12
- inspect_ai/model/_generate_config.py +14 -8
- inspect_ai/model/_model.py +21 -48
- inspect_ai/model/_model_output.py +25 -0
- inspect_ai/model/_openai.py +2 -0
- inspect_ai/model/_providers/anthropic.py +13 -23
- inspect_ai/model/_providers/openai_o1.py +8 -2
- inspect_ai/model/_providers/providers.py +18 -4
- inspect_ai/model/_providers/sglang.py +241 -0
- inspect_ai/model/_providers/vllm.py +207 -400
- inspect_ai/solver/__init__.py +7 -2
- inspect_ai/solver/_basic_agent.py +3 -10
- inspect_ai/solver/_task_state.py +26 -88
- inspect_ai/tool/_json_rpc_helpers.py +45 -17
- inspect_ai/tool/_mcp/_mcp.py +2 -0
- inspect_ai/tool/_mcp/_sandbox.py +8 -2
- inspect_ai/tool/_mcp/server.py +3 -1
- inspect_ai/tool/_tool_call.py +4 -1
- inspect_ai/tool/_tool_support_helpers.py +51 -12
- inspect_ai/tool/_tools/_bash_session.py +190 -68
- inspect_ai/tool/_tools/_computer/_computer.py +25 -1
- inspect_ai/tool/_tools/_text_editor.py +4 -3
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
- inspect_ai/util/__init__.py +12 -0
- inspect_ai/util/_limit.py +393 -0
- inspect_ai/util/_limited_conversation.py +57 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/RECORD +89 -108
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/WHEEL +1 -1
- inspect_ai/solver/_limit.py +0 -39
- inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
- inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
- inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_computer/test_args.py +0 -151
- /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/top_level.txt +0 -0
@@ -1,341 +0,0 @@
|
|
1
|
-
"""Inspired by https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/computer_use_demo/tools/computer.py"""
|
2
|
-
|
3
|
-
import asyncio
|
4
|
-
import base64
|
5
|
-
import logging
|
6
|
-
import os
|
7
|
-
import shlex
|
8
|
-
from pathlib import Path
|
9
|
-
from typing import Literal, TypedDict
|
10
|
-
from uuid import uuid4
|
11
|
-
|
12
|
-
from _run import run
|
13
|
-
from _tool_result import ToolResult
|
14
|
-
|
15
|
-
OUTPUT_DIR = "/tmp/outputs"
|
16
|
-
|
17
|
-
TYPING_DELAY_MS = 12
|
18
|
-
TYPING_GROUP_SIZE = 50
|
19
|
-
|
20
|
-
ColorCount = Literal[4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4]
|
21
|
-
|
22
|
-
|
23
|
-
class X11ClientError(Exception):
|
24
|
-
def __init__(self, message):
|
25
|
-
self.message = message
|
26
|
-
|
27
|
-
|
28
|
-
class Resolution(TypedDict):
|
29
|
-
width: int
|
30
|
-
height: int
|
31
|
-
|
32
|
-
|
33
|
-
# sizes above XGA/WXGA are not recommended (see README.md)
|
34
|
-
# scale down to one of these targets if ComputerTool._scaling_enabled is set
|
35
|
-
MAX_SCALING_TARGETS: dict[str, Resolution] = {
|
36
|
-
"XGA": Resolution(width=1024, height=768), # 4:3
|
37
|
-
"WXGA": Resolution(width=1280, height=800), # 16:10
|
38
|
-
"FWXGA": Resolution(width=1366, height=768), # ~16:9
|
39
|
-
}
|
40
|
-
|
41
|
-
|
42
|
-
ScalingSource = Literal["computer", "api"]
|
43
|
-
|
44
|
-
|
45
|
-
class ComputerToolOptions(TypedDict):
|
46
|
-
display_height_px: int
|
47
|
-
display_width_px: int
|
48
|
-
display_number: int | None
|
49
|
-
|
50
|
-
|
51
|
-
def chunks(s: str, chunk_size: int) -> list[str]:
|
52
|
-
return [s[i : i + chunk_size] for i in range(0, len(s), chunk_size)]
|
53
|
-
|
54
|
-
|
55
|
-
class X11Client:
|
56
|
-
"""
|
57
|
-
A tool that allows the agent to interact with the screen, keyboard, and mouse of the current computer.
|
58
|
-
|
59
|
-
The tool parameters are defined by Anthropic and are not editable.
|
60
|
-
"""
|
61
|
-
|
62
|
-
width: int
|
63
|
-
height: int
|
64
|
-
display_num: int | None
|
65
|
-
# TODO: Complete plumbing this or remove it
|
66
|
-
color_count: ColorCount | None = 256
|
67
|
-
|
68
|
-
_screenshot_delay = 2.0
|
69
|
-
_scaling_enabled = True
|
70
|
-
|
71
|
-
@property
|
72
|
-
def options(self) -> ComputerToolOptions:
|
73
|
-
width, height = self._scale_coordinates("computer", self.width, self.height)
|
74
|
-
return {
|
75
|
-
"display_width_px": width,
|
76
|
-
"display_height_px": height,
|
77
|
-
"display_number": self.display_num,
|
78
|
-
}
|
79
|
-
|
80
|
-
def __init__(self):
|
81
|
-
super().__init__()
|
82
|
-
|
83
|
-
self.width = int(os.getenv("WIDTH") or 0)
|
84
|
-
self.height = int(os.getenv("HEIGHT") or 0)
|
85
|
-
assert self.width and self.height, "WIDTH, HEIGHT must be set"
|
86
|
-
if (display_num := os.getenv("DISPLAY_NUM")) is not None:
|
87
|
-
self.display_num = int(display_num)
|
88
|
-
self._display_prefix = f"DISPLAY=:{self.display_num} "
|
89
|
-
else:
|
90
|
-
self.display_num = None
|
91
|
-
self._display_prefix = ""
|
92
|
-
|
93
|
-
self.xdotool = f"{self._display_prefix}xdotool"
|
94
|
-
|
95
|
-
async def key(self, text: str) -> ToolResult:
|
96
|
-
return await self._shell(f"{self.xdotool} key -- {_key_arg_for_text(text)}")
|
97
|
-
|
98
|
-
async def hold_key(self, text: str, duration: int) -> ToolResult:
|
99
|
-
key_arg = _key_arg_for_text(text)
|
100
|
-
await self._shell(f"{self.xdotool} keydown -- {key_arg}", False)
|
101
|
-
await asyncio.sleep(duration)
|
102
|
-
return await self._shell(f"{self.xdotool} keyup -- {key_arg}")
|
103
|
-
|
104
|
-
async def type(self, text: str) -> ToolResult:
|
105
|
-
results: list[ToolResult] = []
|
106
|
-
for chunk in chunks(text, TYPING_GROUP_SIZE):
|
107
|
-
cmd = (
|
108
|
-
f"{self.xdotool} type --delay {TYPING_DELAY_MS} -- {shlex.quote(chunk)}"
|
109
|
-
)
|
110
|
-
results.append(await self._shell(cmd, take_screenshot=False))
|
111
|
-
|
112
|
-
screenshot_base64 = await self._take_screenshot_after_delay()
|
113
|
-
return ToolResult(
|
114
|
-
output="".join(result.output or "" for result in results),
|
115
|
-
error="".join(result.error or "" for result in results),
|
116
|
-
base64_image=screenshot_base64,
|
117
|
-
)
|
118
|
-
|
119
|
-
async def cursor_position(self) -> ToolResult:
|
120
|
-
result = await self._shell(
|
121
|
-
f"{self.xdotool} getmouselocation --shell",
|
122
|
-
take_screenshot=False,
|
123
|
-
)
|
124
|
-
output = result.output or ""
|
125
|
-
x, y = self._scale_coordinates(
|
126
|
-
"computer",
|
127
|
-
int(output.split("X=")[1].split("\n")[0]),
|
128
|
-
int(output.split("Y=")[1].split("\n")[0]),
|
129
|
-
)
|
130
|
-
return result.replace(output=f"X={x},Y={y}")
|
131
|
-
|
132
|
-
async def left_mouse_down(self) -> ToolResult:
|
133
|
-
return await self._shell(f"{self.xdotool} mousedown 1")
|
134
|
-
|
135
|
-
async def left_mouse_up(self) -> ToolResult:
|
136
|
-
return await self._shell(f"{self.xdotool} mouseup 1")
|
137
|
-
|
138
|
-
async def mouse_move(self, coordinate: tuple[int, int]) -> ToolResult:
|
139
|
-
return await self._mouse_move_and("mouse_move", coordinate, None)
|
140
|
-
|
141
|
-
async def left_click(
|
142
|
-
self, coordinate: tuple[int, int] | None, text: str | None
|
143
|
-
) -> ToolResult:
|
144
|
-
return await self._mouse_move_and("left_click", coordinate, text)
|
145
|
-
|
146
|
-
async def right_click(
|
147
|
-
self, coordinate: tuple[int, int] | None, text: str | None
|
148
|
-
) -> ToolResult:
|
149
|
-
return await self._mouse_move_and("right_click", coordinate, text)
|
150
|
-
|
151
|
-
async def middle_click(
|
152
|
-
self, coordinate: tuple[int, int] | None, text: str | None
|
153
|
-
) -> ToolResult:
|
154
|
-
return await self._mouse_move_and("middle_click", coordinate, text)
|
155
|
-
|
156
|
-
# https://wiki.archlinux.org/title/Mouse_buttons#Thumb_buttons_-_forward_and_back
|
157
|
-
# suggests that, although not in any spec, the de facto standard is 8 for
|
158
|
-
# back and 9 for forward.
|
159
|
-
async def back_click(
|
160
|
-
self, coordinate: tuple[int, int] | None, text: str | None
|
161
|
-
) -> ToolResult:
|
162
|
-
return await self._mouse_move_and("back_click", coordinate, text)
|
163
|
-
|
164
|
-
async def forward_click(
|
165
|
-
self, coordinate: tuple[int, int] | None, text: str | None
|
166
|
-
) -> ToolResult:
|
167
|
-
return await self._mouse_move_and("forward_click", coordinate, text)
|
168
|
-
|
169
|
-
async def double_click(
|
170
|
-
self, coordinate: tuple[int, int] | None, text: str | None
|
171
|
-
) -> ToolResult:
|
172
|
-
return await self._mouse_move_and("double_click", coordinate, text)
|
173
|
-
|
174
|
-
async def triple_click(
|
175
|
-
self, coordinate: tuple[int, int] | None, text: str | None
|
176
|
-
) -> ToolResult:
|
177
|
-
return await self._mouse_move_and("triple_click", coordinate, text)
|
178
|
-
|
179
|
-
async def left_click_drag(
|
180
|
-
self, start_coordinate: tuple[int, int], coordinate: tuple[int, int]
|
181
|
-
) -> ToolResult:
|
182
|
-
await self._move_mouse_to_coordinate(start_coordinate, False)
|
183
|
-
x, y = self._scale_coordinates("api", *coordinate)
|
184
|
-
return await self._shell(
|
185
|
-
f"{self.xdotool} mousedown 1 mousemove --sync {x} {y} mouseup 1"
|
186
|
-
)
|
187
|
-
|
188
|
-
async def scroll(
|
189
|
-
self,
|
190
|
-
scroll_direction: Literal["up", "down", "left", "right"],
|
191
|
-
scroll_amount: int,
|
192
|
-
coordinate: tuple[int, int] | None,
|
193
|
-
text: str | None,
|
194
|
-
) -> ToolResult:
|
195
|
-
if coordinate:
|
196
|
-
await self._move_mouse_to_coordinate(coordinate, False)
|
197
|
-
scroll_button = {
|
198
|
-
"up": 4,
|
199
|
-
"down": 5,
|
200
|
-
"left": 6,
|
201
|
-
"right": 7,
|
202
|
-
}[scroll_direction]
|
203
|
-
|
204
|
-
if text:
|
205
|
-
key_arg = _key_arg_for_text(text)
|
206
|
-
await self._shell(f"{self.xdotool} keydown -- {key_arg}", False)
|
207
|
-
await self._shell(
|
208
|
-
f"{self.xdotool} click --repeat {scroll_amount} {scroll_button}",
|
209
|
-
False,
|
210
|
-
)
|
211
|
-
return await self._shell(f"{self.xdotool} keyup -- {key_arg}")
|
212
|
-
else:
|
213
|
-
return await self._shell(
|
214
|
-
f"{self.xdotool} click --repeat {scroll_amount} {scroll_button}"
|
215
|
-
)
|
216
|
-
|
217
|
-
async def wait(self, duration: int) -> ToolResult:
|
218
|
-
await asyncio.sleep(duration)
|
219
|
-
return await self.screenshot()
|
220
|
-
|
221
|
-
async def screenshot(self) -> ToolResult:
|
222
|
-
return await self._screenshot()
|
223
|
-
|
224
|
-
async def _mouse_move_and(
|
225
|
-
self,
|
226
|
-
action: Literal[
|
227
|
-
"mouse_move",
|
228
|
-
"left_click",
|
229
|
-
"right_click",
|
230
|
-
"middle_click",
|
231
|
-
"back_click",
|
232
|
-
"forward_click",
|
233
|
-
"double_click",
|
234
|
-
"triple_click",
|
235
|
-
],
|
236
|
-
coordinate: tuple[int, int] | None,
|
237
|
-
text: str | None,
|
238
|
-
):
|
239
|
-
should_move = action == "mouse_move" or coordinate
|
240
|
-
if should_move:
|
241
|
-
assert coordinate # coding/type safety error
|
242
|
-
move_result = await self._move_mouse_to_coordinate(
|
243
|
-
coordinate, action == "mouse_move"
|
244
|
-
)
|
245
|
-
if action == "mouse_move":
|
246
|
-
return move_result
|
247
|
-
click_arg = {
|
248
|
-
"left_click": "1",
|
249
|
-
"right_click": "3",
|
250
|
-
"middle_click": "2",
|
251
|
-
"back_click": "8",
|
252
|
-
"forward_click": "9",
|
253
|
-
"double_click": "--repeat 2 --delay 300 1",
|
254
|
-
"triple_click": "--repeat 3 --delay 300 1",
|
255
|
-
}[action]
|
256
|
-
|
257
|
-
if text:
|
258
|
-
key_arg = _key_arg_for_text(text)
|
259
|
-
await self._shell(f"{self.xdotool} keydown -- {key_arg}", False)
|
260
|
-
await self._shell(f"{self.xdotool} click {click_arg}", False)
|
261
|
-
return await self._shell(f"{self.xdotool} keyup -- {key_arg}")
|
262
|
-
else:
|
263
|
-
return await self._shell(f"{self.xdotool} click {click_arg}")
|
264
|
-
|
265
|
-
async def _move_mouse_to_coordinate(
|
266
|
-
self, coordinate: tuple[int, int], take_screenshot: bool
|
267
|
-
):
|
268
|
-
x, y = self._scale_coordinates("api", *coordinate)
|
269
|
-
return await self._shell(
|
270
|
-
f"{self.xdotool} mousemove --sync {x} {y}", take_screenshot=take_screenshot
|
271
|
-
)
|
272
|
-
|
273
|
-
async def _screenshot(self):
|
274
|
-
"""Take a screenshot of the current screen and return the base64 encoded image."""
|
275
|
-
output_dir = Path(OUTPUT_DIR)
|
276
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
277
|
-
path = output_dir / f"screenshot_{uuid4().hex}.png"
|
278
|
-
|
279
|
-
result = await self._shell(
|
280
|
-
f"{self._display_prefix}scrot --silent -p {path}", take_screenshot=False
|
281
|
-
)
|
282
|
-
if self._scaling_enabled:
|
283
|
-
x, y = self._scale_coordinates("computer", self.width, self.height)
|
284
|
-
convert_cmd = f"convert {path} -resize {x}x{y}!"
|
285
|
-
if self.color_count is not None:
|
286
|
-
convert_cmd += f" -colors {self.color_count}"
|
287
|
-
convert_cmd += f" {path}"
|
288
|
-
await self._shell(convert_cmd, take_screenshot=False)
|
289
|
-
|
290
|
-
if path.exists():
|
291
|
-
return result.replace(
|
292
|
-
base64_image=base64.b64encode(path.read_bytes()).decode()
|
293
|
-
)
|
294
|
-
raise X11ClientError(f"Failed to take screenshot: {result.error}")
|
295
|
-
|
296
|
-
async def _shell(self, command: str, take_screenshot=True) -> ToolResult:
|
297
|
-
"""Run a shell command and return the output, error, and optionally a screenshot."""
|
298
|
-
logging.debug(f"running shell command {command}")
|
299
|
-
_, stdout, stderr = await run(command)
|
300
|
-
logging.debug(f"shell command returned stdout: {stdout}, stderr: {stderr}")
|
301
|
-
return ToolResult(
|
302
|
-
output=stdout,
|
303
|
-
error=stderr,
|
304
|
-
base64_image=(await self._take_screenshot_after_delay())
|
305
|
-
if take_screenshot
|
306
|
-
else None,
|
307
|
-
)
|
308
|
-
|
309
|
-
async def _take_screenshot_after_delay(self) -> str:
|
310
|
-
# delay to let things settle before taking a screenshot
|
311
|
-
await asyncio.sleep(self._screenshot_delay)
|
312
|
-
return (await self._screenshot()).base64_image
|
313
|
-
|
314
|
-
def _scale_coordinates(self, source: ScalingSource, x: int, y: int):
|
315
|
-
"""Scale coordinates to a target maximum resolution."""
|
316
|
-
if not self._scaling_enabled:
|
317
|
-
return x, y
|
318
|
-
ratio = self.width / self.height
|
319
|
-
target_dimension = None
|
320
|
-
for dimension in MAX_SCALING_TARGETS.values():
|
321
|
-
# allow some error in the aspect ratio - not ratios are exactly 16:9
|
322
|
-
if abs(dimension["width"] / dimension["height"] - ratio) < 0.02:
|
323
|
-
if dimension["width"] < self.width:
|
324
|
-
target_dimension = dimension
|
325
|
-
break
|
326
|
-
if target_dimension is None:
|
327
|
-
return x, y
|
328
|
-
# should be less than 1
|
329
|
-
x_scaling_factor = target_dimension["width"] / self.width
|
330
|
-
y_scaling_factor = target_dimension["height"] / self.height
|
331
|
-
if source == "api":
|
332
|
-
if x > self.width or y > self.height:
|
333
|
-
raise X11ClientError(f"Coordinates {x}, {y} are out of bounds")
|
334
|
-
# scale up
|
335
|
-
return round(x / x_scaling_factor), round(y / y_scaling_factor)
|
336
|
-
# scale down
|
337
|
-
return round(x * x_scaling_factor), round(y * y_scaling_factor)
|
338
|
-
|
339
|
-
|
340
|
-
def _key_arg_for_text(text: str) -> str:
|
341
|
-
return " ".join(shlex.quote(part) for part in text.split())
|
@@ -1,141 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import json
|
3
|
-
import logging
|
4
|
-
import os
|
5
|
-
import sys
|
6
|
-
import time
|
7
|
-
from argparse import Namespace
|
8
|
-
from typing import TypeVar
|
9
|
-
|
10
|
-
from _args import parse_arguments
|
11
|
-
from _constants import Action
|
12
|
-
from _logger import setup_logger
|
13
|
-
from _tool_result import ToolResult
|
14
|
-
from _x11_client import X11Client
|
15
|
-
|
16
|
-
|
17
|
-
class ComputerToolError(Exception):
|
18
|
-
def __init__(self, message):
|
19
|
-
self.message = message
|
20
|
-
|
21
|
-
|
22
|
-
# This is a bit sketchy. We really want to use relative imports here. Using absolute imports
|
23
|
-
# works at runtime, but it prevents intellisense from working. However, when this folder is
|
24
|
-
# copied to the container, by default relative imports won't work if this file is launched
|
25
|
-
# normally. To overcome this, two things need to happen:
|
26
|
-
# 1. PYTHONPATH must be set to the parent of the container folder. `PYTHONPATH=/opt`
|
27
|
-
# 2. The program must be launched with the -m flag. `python3 -m computer_tool.computer_tool`
|
28
|
-
#
|
29
|
-
# TODO: There's got to be a cleaner way.
|
30
|
-
|
31
|
-
my_logger = setup_logger(logging.INFO)
|
32
|
-
|
33
|
-
|
34
|
-
def main():
|
35
|
-
try:
|
36
|
-
args = parse_arguments()
|
37
|
-
my_logger.info(f"({args})")
|
38
|
-
result = asyncio.run(execute_action(args))
|
39
|
-
|
40
|
-
print(
|
41
|
-
json.dumps(
|
42
|
-
{
|
43
|
-
"output": result.output,
|
44
|
-
"error": result.error,
|
45
|
-
"base64_image": result.base64_image,
|
46
|
-
}
|
47
|
-
)
|
48
|
-
)
|
49
|
-
my_logger.debug("SUCCESS")
|
50
|
-
except Exception as e:
|
51
|
-
my_logger.warning(f"An error occurred: {e}")
|
52
|
-
print(f"An error occurred: {e}", file=sys.stderr)
|
53
|
-
sys.exit(1)
|
54
|
-
|
55
|
-
|
56
|
-
async def execute_action(args: Namespace) -> ToolResult:
|
57
|
-
# we can't do anything until X11 is ready to go.
|
58
|
-
await wait_for_file("/tmp/xfce_started")
|
59
|
-
|
60
|
-
computer = X11Client()
|
61
|
-
action: Action = args.action
|
62
|
-
match action:
|
63
|
-
case "key":
|
64
|
-
return await computer.key(not_none(args.text, "text"))
|
65
|
-
case "hold_key":
|
66
|
-
return await computer.hold_key(
|
67
|
-
not_none(args.text, "text"), not_none(args.duration, "duration")
|
68
|
-
)
|
69
|
-
case "type":
|
70
|
-
return await computer.type(not_none(args.text, "text"))
|
71
|
-
case "cursor_position":
|
72
|
-
return await computer.cursor_position()
|
73
|
-
case "left_mouse_down":
|
74
|
-
return await computer.left_mouse_down()
|
75
|
-
case "left_mouse_up":
|
76
|
-
return await computer.left_mouse_up()
|
77
|
-
case "mouse_move":
|
78
|
-
return await computer.mouse_move(not_none(args.coordinate, "coordinate"))
|
79
|
-
case "left_click":
|
80
|
-
return await computer.left_click(
|
81
|
-
getattr(args, "coordinate", None), getattr(args, "text", None)
|
82
|
-
)
|
83
|
-
case "right_click":
|
84
|
-
return await computer.right_click(
|
85
|
-
getattr(args, "coordinate", None), getattr(args, "text", None)
|
86
|
-
)
|
87
|
-
case "middle_click":
|
88
|
-
return await computer.middle_click(
|
89
|
-
getattr(args, "coordinate", None), getattr(args, "text", None)
|
90
|
-
)
|
91
|
-
case "double_click":
|
92
|
-
return await computer.double_click(
|
93
|
-
getattr(args, "coordinate", None), getattr(args, "text", None)
|
94
|
-
)
|
95
|
-
case "triple_click":
|
96
|
-
return await computer.triple_click(
|
97
|
-
getattr(args, "coordinate", None), getattr(args, "text", None)
|
98
|
-
)
|
99
|
-
case "left_click_drag":
|
100
|
-
return await computer.left_click_drag(
|
101
|
-
not_none(args.start_coordinate, "start_coordinate"),
|
102
|
-
not_none(args.coordinate, "coordinate"),
|
103
|
-
)
|
104
|
-
case "scroll":
|
105
|
-
return await computer.scroll(
|
106
|
-
not_none(args.scroll_direction, "scroll_direction"),
|
107
|
-
not_none(args.scroll_amount, "scroll_amount"),
|
108
|
-
getattr(args, "coordinate", None),
|
109
|
-
getattr(args, "text", None),
|
110
|
-
)
|
111
|
-
case "wait":
|
112
|
-
return await computer.wait(not_none(args.duration, "duration"))
|
113
|
-
case "screenshot":
|
114
|
-
return await computer.screenshot()
|
115
|
-
|
116
|
-
raise ComputerToolError(f"Invalid action: {action}")
|
117
|
-
|
118
|
-
|
119
|
-
async def wait_for_file(file_path, check_interval=1):
|
120
|
-
if os.path.exists(file_path):
|
121
|
-
return
|
122
|
-
my_logger.info(f"Waiting for {file_path}")
|
123
|
-
start_time = time.time()
|
124
|
-
while not os.path.exists(file_path):
|
125
|
-
await asyncio.sleep(check_interval)
|
126
|
-
my_logger.info(
|
127
|
-
f"Done waiting for {file_path} after {time.time() - start_time:.1f} seconds"
|
128
|
-
)
|
129
|
-
|
130
|
-
|
131
|
-
T = TypeVar("T")
|
132
|
-
|
133
|
-
|
134
|
-
def not_none(value: T | None, name: str) -> T:
|
135
|
-
if value is None:
|
136
|
-
raise ComputerToolError(f"{name} must be provided")
|
137
|
-
return value
|
138
|
-
|
139
|
-
|
140
|
-
if __name__ == "__main__":
|
141
|
-
main()
|
@@ -1,65 +0,0 @@
|
|
1
|
-
[build-system]
|
2
|
-
requires = ["setuptools>=64", "setuptools_scm[toml]>=8"]
|
3
|
-
build-backend = "setuptools.build_meta"
|
4
|
-
|
5
|
-
[tool.setuptools_scm]
|
6
|
-
|
7
|
-
[tool.setuptools.packages.find]
|
8
|
-
where = ["."]
|
9
|
-
include = ["inspect_ai*"]
|
10
|
-
|
11
|
-
[tool.ruff]
|
12
|
-
src = ["."]
|
13
|
-
|
14
|
-
[tool.ruff.lint]
|
15
|
-
select = [
|
16
|
-
"E", # pycodestyle errors
|
17
|
-
"W", # pycodestyle warnings
|
18
|
-
"F", # flake8
|
19
|
-
"D", # pydocstyle
|
20
|
-
"I", # isort
|
21
|
-
"SIM101", # duplicate isinstance
|
22
|
-
"UP038", # non-pep604-isinstance
|
23
|
-
# "RET", # flake8-return
|
24
|
-
# "RUF", # ruff rules
|
25
|
-
]
|
26
|
-
ignore = ["E203", "E501", "D10", "D212", "D415"]
|
27
|
-
|
28
|
-
[tool.ruff.lint.pydocstyle]
|
29
|
-
convention = "google"
|
30
|
-
|
31
|
-
[tool.pytest.ini_options]
|
32
|
-
minversion = "7.0"
|
33
|
-
addopts = "-rA --doctest-modules --color=yes"
|
34
|
-
doctest_optionflags = ["NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL"]
|
35
|
-
asyncio_mode = "auto"
|
36
|
-
asyncio_default_fixture_loop_scope = "function"
|
37
|
-
log_level = "warning"
|
38
|
-
|
39
|
-
[tool.mypy]
|
40
|
-
warn_unused_ignores = true
|
41
|
-
no_implicit_reexport = true
|
42
|
-
strict_equality = true
|
43
|
-
warn_redundant_casts = true
|
44
|
-
warn_unused_configs = true
|
45
|
-
disallow_any_explicit = true
|
46
|
-
disallow_any_generics = true
|
47
|
-
disallow_subclassing_any = true
|
48
|
-
plugins=["pydantic.mypy"]
|
49
|
-
|
50
|
-
|
51
|
-
[tool.pydantic-mypy]
|
52
|
-
init_forbid_extra = true
|
53
|
-
init_typed = true
|
54
|
-
|
55
|
-
[tool.check-wheel-contents]
|
56
|
-
ignore = ["W002", "W009"]
|
57
|
-
|
58
|
-
[project]
|
59
|
-
name = "web_browser_tool_container"
|
60
|
-
requires-python = ">=3.10"
|
61
|
-
dynamic = ["version", "dependencies"]
|
62
|
-
|
63
|
-
|
64
|
-
[project.optional-dependencies]
|
65
|
-
dev = ["pytest"]
|
File without changes
|
@@ -1,151 +0,0 @@
|
|
1
|
-
import pytest
|
2
|
-
|
3
|
-
from ._resources.tool._args import parse_arguments
|
4
|
-
|
5
|
-
|
6
|
-
def test_parse_args_screenshot() -> None:
|
7
|
-
args = parse_arguments(["screenshot"])
|
8
|
-
assert args.action == "screenshot"
|
9
|
-
|
10
|
-
|
11
|
-
def test_parse_args_cursor_position() -> None:
|
12
|
-
args = parse_arguments(["cursor_position"])
|
13
|
-
assert args.action == "cursor_position"
|
14
|
-
|
15
|
-
|
16
|
-
def test_parse_args_type() -> None:
|
17
|
-
args = parse_arguments(["type", "--text", "hello"])
|
18
|
-
assert args.action == "type"
|
19
|
-
assert args.text == "hello"
|
20
|
-
|
21
|
-
|
22
|
-
def test_parse_args_mouse_move() -> None:
|
23
|
-
args = parse_arguments(["mouse_move", "--coordinate", "100", "200"])
|
24
|
-
assert args.action == "mouse_move"
|
25
|
-
assert args.coordinate == [100, 200]
|
26
|
-
|
27
|
-
|
28
|
-
def test_parse_args_left_click() -> None:
|
29
|
-
args = parse_arguments(["left_click", "--coordinate", "100", "200"])
|
30
|
-
assert args.action == "left_click"
|
31
|
-
assert args.coordinate == [100, 200]
|
32
|
-
|
33
|
-
|
34
|
-
def test_parse_args_right_click() -> None:
|
35
|
-
args = parse_arguments(["right_click", "--coordinate", "100", "200"])
|
36
|
-
assert args.action == "right_click"
|
37
|
-
assert args.coordinate == [100, 200]
|
38
|
-
|
39
|
-
|
40
|
-
def test_parse_args_middle_click() -> None:
|
41
|
-
args = parse_arguments(["middle_click", "--coordinate", "100", "200"])
|
42
|
-
assert args.action == "middle_click"
|
43
|
-
assert args.coordinate == [100, 200]
|
44
|
-
|
45
|
-
|
46
|
-
def test_parse_args_double_click() -> None:
|
47
|
-
args = parse_arguments(["double_click", "--coordinate", "100", "200"])
|
48
|
-
assert args.action == "double_click"
|
49
|
-
assert args.coordinate == [100, 200]
|
50
|
-
|
51
|
-
|
52
|
-
def test_parse_args_triple_click() -> None:
|
53
|
-
args = parse_arguments(["triple_click", "--coordinate", "100", "200"])
|
54
|
-
assert args.action == "triple_click"
|
55
|
-
assert args.coordinate == [100, 200]
|
56
|
-
|
57
|
-
|
58
|
-
def test_parse_args_hold_key() -> None:
|
59
|
-
args = parse_arguments(["hold_key", "--text", "a", "--duration", "5"])
|
60
|
-
assert args.action == "hold_key"
|
61
|
-
assert args.text == "a"
|
62
|
-
assert args.duration == 5
|
63
|
-
|
64
|
-
|
65
|
-
def test_parse_args_left_click_drag() -> None:
|
66
|
-
args = parse_arguments(
|
67
|
-
[
|
68
|
-
"left_click_drag",
|
69
|
-
"--start_coordinate",
|
70
|
-
"100",
|
71
|
-
"200",
|
72
|
-
"--coordinate",
|
73
|
-
"300",
|
74
|
-
"400",
|
75
|
-
"--text",
|
76
|
-
"drag",
|
77
|
-
]
|
78
|
-
)
|
79
|
-
assert args.action == "left_click_drag"
|
80
|
-
assert args.start_coordinate == [100, 200]
|
81
|
-
assert args.coordinate == [300, 400]
|
82
|
-
assert args.text == "drag"
|
83
|
-
|
84
|
-
|
85
|
-
def test_parse_args_scroll() -> None:
|
86
|
-
args = parse_arguments(
|
87
|
-
[
|
88
|
-
"scroll",
|
89
|
-
"--scroll_direction",
|
90
|
-
"up",
|
91
|
-
"--scroll_amount",
|
92
|
-
"10",
|
93
|
-
"--coordinate",
|
94
|
-
"100",
|
95
|
-
"200",
|
96
|
-
]
|
97
|
-
)
|
98
|
-
assert args.action == "scroll"
|
99
|
-
assert args.scroll_direction == "up"
|
100
|
-
assert args.scroll_amount == 10
|
101
|
-
assert args.coordinate == [100, 200]
|
102
|
-
|
103
|
-
|
104
|
-
def test_parse_args_wait() -> None:
|
105
|
-
args = parse_arguments(["wait", "--duration", "5"])
|
106
|
-
assert args.action == "wait"
|
107
|
-
assert args.duration == 5
|
108
|
-
|
109
|
-
|
110
|
-
def test_parse_args_type_missing_text() -> None:
|
111
|
-
with pytest.raises(SystemExit):
|
112
|
-
parse_arguments(["type"])
|
113
|
-
|
114
|
-
|
115
|
-
def test_parse_args_invalid_action() -> None:
|
116
|
-
with pytest.raises(SystemExit):
|
117
|
-
parse_arguments(["invalid_action"])
|
118
|
-
|
119
|
-
|
120
|
-
def test_parse_args_mouse_move_missing_coordinate() -> None:
|
121
|
-
with pytest.raises(SystemExit):
|
122
|
-
parse_arguments(["mouse_move"])
|
123
|
-
|
124
|
-
|
125
|
-
def test_parse_args_click_invalid_coordinate() -> None:
|
126
|
-
with pytest.raises(SystemExit):
|
127
|
-
parse_arguments(["left_click", "--coordinate", "100"])
|
128
|
-
|
129
|
-
|
130
|
-
def test_parse_args_hold_key_missing_duration() -> None:
|
131
|
-
with pytest.raises(SystemExit):
|
132
|
-
parse_arguments(["hold_key", "--text", "a"])
|
133
|
-
|
134
|
-
|
135
|
-
def test_parse_args_left_click_drag_missing_start_coordinate() -> None:
|
136
|
-
with pytest.raises(SystemExit):
|
137
|
-
parse_arguments(
|
138
|
-
["left_click_drag", "--coordinate", "300", "400", "--text", "drag"]
|
139
|
-
)
|
140
|
-
|
141
|
-
|
142
|
-
def test_parse_args_scroll_missing_scroll_direction() -> None:
|
143
|
-
with pytest.raises(SystemExit):
|
144
|
-
parse_arguments(
|
145
|
-
["scroll", "--scroll_amount", "10", "--coordinate", "100", "200"]
|
146
|
-
)
|
147
|
-
|
148
|
-
|
149
|
-
def test_parse_args_wait_missing_duration() -> None:
|
150
|
-
with pytest.raises(SystemExit):
|
151
|
-
parse_arguments(["wait"])
|