inspect-ai 0.3.58__py3-none-any.whl → 0.3.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -2
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +78 -11
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/score.py +1 -0
- inspect_ai/_eval/task/results.py +50 -22
- inspect_ai/_eval/task/run.py +41 -7
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25344 -1849
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +8 -10
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +75 -2
- inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +24 -12
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/Json.mjs +12 -6
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +4 -0
- inspect_ai/model/_conversation.py +20 -8
- inspect_ai/model/_generate_config.py +10 -4
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +7 -2
- inspect_ai/model/_providers/anthropic.py +100 -44
- inspect_ai/model/_providers/azureai.py +20 -20
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/mistral.py +11 -11
- inspect_ai/model/_providers/openai.py +15 -16
- inspect_ai/model/_providers/openai_o1.py +9 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +8 -8
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +2 -2
- inspect_ai/solver/__init__.py +2 -5
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +126 -98
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,155 @@
|
|
1
|
+
from typing import Awaitable, Callable
|
2
|
+
|
3
|
+
from inspect_ai._util.content import Content, ContentImage, ContentText
|
4
|
+
from inspect_ai.tool import Tool, ToolResult, tool
|
5
|
+
from inspect_ai.tool._tool import (
|
6
|
+
TOOL_INIT_MODEL_INPUT,
|
7
|
+
ToolParsingError,
|
8
|
+
)
|
9
|
+
from inspect_ai.tool._tool_call import ToolCallModelInput
|
10
|
+
|
11
|
+
from . import _common as common
|
12
|
+
from ._common import Action
|
13
|
+
|
14
|
+
ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
|
15
|
+
|
16
|
+
|
17
|
+
@tool
|
18
|
+
def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
|
19
|
+
async def execute(
|
20
|
+
action: Action,
|
21
|
+
text: str | None = None,
|
22
|
+
coordinate: list[int] | None = None,
|
23
|
+
) -> ToolResult:
|
24
|
+
"""
|
25
|
+
Use this tool to interact with a computer.
|
26
|
+
|
27
|
+
Use a mouse and keyboard to interact with a computer's desktop GUI.
|
28
|
+
|
29
|
+
Keep in mind that icons require double clicks to open while other UI affordances like menu items and buttons require a single click.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
action (Action): The action to perform.
|
33
|
+
- `key`: Press a key or key-combination on the keyboard.
|
34
|
+
- Example: execute(action="key", text="ctrl+s")
|
35
|
+
- Text can be any key name supported by xdotool's `key` such as:
|
36
|
+
"Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
|
37
|
+
"Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
|
38
|
+
"F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
|
39
|
+
"Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
|
40
|
+
"KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
|
41
|
+
"KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal",
|
42
|
+
- `type`: Type a string of text on the keyboard. If the text contains spaces, enclose it in quotes.
|
43
|
+
- Example: execute(action="type", text="The crux of the biscuit is the apostrophe!")
|
44
|
+
- `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
|
45
|
+
- `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
|
46
|
+
- Example: execute(action="mouse_move", coordinate=(100, 200))
|
47
|
+
- `left_click`: Click the left mouse button.
|
48
|
+
- `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
|
49
|
+
- Example: execute(action="left_click_drag", coordinate=(150, 250))
|
50
|
+
- `right_click`: Click the right mouse button.
|
51
|
+
- `middle_click`: Click the middle mouse button.
|
52
|
+
- `double_click`: Double-click the left mouse button.
|
53
|
+
- `screenshot`: Take a screenshot.
|
54
|
+
text (str | None): The text to type or the key to press. Required when action is "key" or "type".
|
55
|
+
coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen to which to move or drag. Required when action is "mouse_move" or "left_click_drag".
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
The output of the command. Many commands will include a screenshot reflecting the result of the command in their output.
|
59
|
+
"""
|
60
|
+
if action in ("mouse_move", "left_click_drag"):
|
61
|
+
if coordinate is None:
|
62
|
+
raise ToolParsingError(f"coordinate is required for {action}")
|
63
|
+
if text is not None:
|
64
|
+
raise ToolParsingError(f"text is not accepted for {action}")
|
65
|
+
if not isinstance(coordinate, list) or len(coordinate) != 2:
|
66
|
+
raise ToolParsingError(f"{coordinate} must be a tuple of length 2")
|
67
|
+
if not all(isinstance(i, int) and i >= 0 for i in coordinate):
|
68
|
+
raise ToolParsingError(
|
69
|
+
f"{coordinate} must be a tuple of non-negative ints"
|
70
|
+
)
|
71
|
+
|
72
|
+
if action == "mouse_move":
|
73
|
+
return await common.mouse_move(
|
74
|
+
coordinate[0], coordinate[1], timeout=timeout
|
75
|
+
)
|
76
|
+
elif action == "left_click_drag":
|
77
|
+
return await common.left_click_drag(
|
78
|
+
coordinate[0], coordinate[1], timeout=timeout
|
79
|
+
)
|
80
|
+
|
81
|
+
if action in ("key", "type"):
|
82
|
+
if text is None:
|
83
|
+
raise ToolParsingError(f"text is required for {action}")
|
84
|
+
if coordinate is not None:
|
85
|
+
raise ToolParsingError(f"coordinate is not accepted for {action}")
|
86
|
+
if not isinstance(text, str):
|
87
|
+
raise ToolParsingError(output=f"{text} must be a string")
|
88
|
+
|
89
|
+
if action == "key":
|
90
|
+
return await common.press_key(text, timeout=timeout)
|
91
|
+
elif action == "type":
|
92
|
+
return await common.type(text, timeout=timeout)
|
93
|
+
|
94
|
+
if action in (
|
95
|
+
"left_click",
|
96
|
+
"right_click",
|
97
|
+
"double_click",
|
98
|
+
"middle_click",
|
99
|
+
"screenshot",
|
100
|
+
"cursor_position",
|
101
|
+
):
|
102
|
+
if text is not None:
|
103
|
+
raise ToolParsingError(f"text is not accepted for {action}")
|
104
|
+
if coordinate is not None:
|
105
|
+
raise ToolParsingError(f"coordinate is not accepted for {action}")
|
106
|
+
|
107
|
+
if action == "screenshot":
|
108
|
+
return await common.screenshot(timeout=timeout)
|
109
|
+
elif action == "cursor_position":
|
110
|
+
return await common.cursor_position(timeout=timeout)
|
111
|
+
elif action == "left_click":
|
112
|
+
return await common.left_click(timeout=timeout)
|
113
|
+
elif action == "right_click":
|
114
|
+
return await common.right_click(timeout=timeout)
|
115
|
+
elif action == "middle_click":
|
116
|
+
return await common.middle_click(timeout=timeout)
|
117
|
+
elif action == "double_click":
|
118
|
+
return await common.double_click(timeout=timeout)
|
119
|
+
|
120
|
+
raise ToolParsingError(f"Invalid action: {action}")
|
121
|
+
|
122
|
+
# if max_screenshots is specified then polk model input into where @tool can find it
|
123
|
+
if max_screenshots is not None:
|
124
|
+
setattr(execute, TOOL_INIT_MODEL_INPUT, _computer_model_input(max_screenshots))
|
125
|
+
|
126
|
+
return execute
|
127
|
+
|
128
|
+
|
129
|
+
def _computer_model_input(max_screenshots: int) -> ToolCallModelInput:
|
130
|
+
def model_input(
|
131
|
+
message_index: int, message_total: int, content: str | list[Content]
|
132
|
+
) -> str | list[Content]:
|
133
|
+
# nothing to do for scalars
|
134
|
+
if isinstance(content, str):
|
135
|
+
return content
|
136
|
+
|
137
|
+
# if we are inside max_screenshots then return as is
|
138
|
+
elif (message_total - message_index) <= max_screenshots:
|
139
|
+
return content
|
140
|
+
|
141
|
+
# otherwise convert images to text placeholdrs
|
142
|
+
else:
|
143
|
+
input_content: list[Content] = []
|
144
|
+
for c in content:
|
145
|
+
if isinstance(c, ContentImage):
|
146
|
+
input_content.append(
|
147
|
+
ContentText(
|
148
|
+
text="Screenshot removed to reduce size of input. Please consult the latest screenshots for the most up to date state of the screen."
|
149
|
+
)
|
150
|
+
)
|
151
|
+
else:
|
152
|
+
input_content.append(c)
|
153
|
+
return input_content
|
154
|
+
|
155
|
+
return model_input
|
@@ -0,0 +1,198 @@
|
|
1
|
+
"""
|
2
|
+
This module provides the same functionality as the computer tool but via a list of per-action tools . e.g. computer_mouse_move(100, 100).
|
3
|
+
|
4
|
+
The split version is not publicly exported, but is retained until we decide if it performs better than the monolithic computer tool.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Awaitable, Callable
|
8
|
+
|
9
|
+
from inspect_ai.tool import Tool, ToolResult, tool
|
10
|
+
|
11
|
+
from . import _common as common
|
12
|
+
|
13
|
+
ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
|
14
|
+
|
15
|
+
|
16
|
+
def computer_split(timeout: int | None = None) -> list[Tool]:
|
17
|
+
"""
|
18
|
+
Computer interaction tools.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
timeout (int | None): Timeout (in seconds) for command.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
List of computer interaction tools.
|
25
|
+
"""
|
26
|
+
return [
|
27
|
+
computer_cursor_position(),
|
28
|
+
computer_screenshot(),
|
29
|
+
computer_mouse_move(),
|
30
|
+
computer_left_click(),
|
31
|
+
computer_double_click(),
|
32
|
+
computer_left_click_drag(),
|
33
|
+
computer_right_click(),
|
34
|
+
computer_key(),
|
35
|
+
computer_type(),
|
36
|
+
]
|
37
|
+
|
38
|
+
|
39
|
+
@tool()
|
40
|
+
def computer_cursor_position(timeout: int | None = None) -> Tool:
|
41
|
+
async def execute() -> ToolResult:
|
42
|
+
"""
|
43
|
+
Get the current (x, y) pixel coordinate of the cursor on the screen.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
None
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
A `str` of the form "x y" where x and y are the current mouse coordinates.
|
50
|
+
"""
|
51
|
+
return await common.cursor_position(timeout=timeout)
|
52
|
+
|
53
|
+
return execute
|
54
|
+
|
55
|
+
|
56
|
+
@tool()
|
57
|
+
def computer_screenshot(timeout: int | None = None) -> Tool:
|
58
|
+
async def execute() -> ToolResult:
|
59
|
+
"""
|
60
|
+
Take a screenshot.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
None
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
A `list` with a single `ContentImage` of the screen.
|
67
|
+
"""
|
68
|
+
return await common.screenshot(timeout=timeout)
|
69
|
+
|
70
|
+
return execute
|
71
|
+
|
72
|
+
|
73
|
+
@tool()
|
74
|
+
def computer_mouse_move(timeout: int | None = None) -> Tool:
|
75
|
+
async def execute(x: int, y: int) -> ToolResult:
|
76
|
+
"""
|
77
|
+
Move the cursor to a specified (x, y) pixel coordinate on the screen.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
x: X coordinate of the mouse destination.
|
81
|
+
y: Y coordinate of the mouse destination.
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
A `list` with a single `ContentImage` of the screen.
|
85
|
+
"""
|
86
|
+
return await common.mouse_move(x, y, timeout=timeout)
|
87
|
+
|
88
|
+
return execute
|
89
|
+
|
90
|
+
|
91
|
+
@tool()
|
92
|
+
def computer_left_click(timeout: int | None = None) -> Tool:
|
93
|
+
async def execute() -> ToolResult:
|
94
|
+
"""
|
95
|
+
Click the left mouse button.
|
96
|
+
|
97
|
+
Args:
|
98
|
+
None
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
A `list` with a single `ContentImage` of the screen.
|
102
|
+
"""
|
103
|
+
return await common.left_click(timeout=timeout)
|
104
|
+
|
105
|
+
return execute
|
106
|
+
|
107
|
+
|
108
|
+
@tool()
|
109
|
+
def computer_double_click(timeout: int | None = None) -> Tool:
|
110
|
+
async def execute() -> ToolResult:
|
111
|
+
"""
|
112
|
+
Double-click the left mouse button.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
None
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
A `list` with a single `ContentImage` of the screen.
|
119
|
+
"""
|
120
|
+
return await common.double_click(timeout=timeout)
|
121
|
+
|
122
|
+
return execute
|
123
|
+
|
124
|
+
|
125
|
+
@tool()
|
126
|
+
def computer_left_click_drag(timeout: int | None = None) -> Tool:
|
127
|
+
async def execute(x: int, y: int) -> ToolResult:
|
128
|
+
"""
|
129
|
+
Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
x: X coordinate of the mouse destination.
|
133
|
+
y: Y coordinate of the mouse destination.
|
134
|
+
|
135
|
+
Returns:
|
136
|
+
A `list` with a single `ContentImage` of the screen.
|
137
|
+
"""
|
138
|
+
return await common.left_click_drag(x, y, timeout=timeout)
|
139
|
+
|
140
|
+
return execute
|
141
|
+
|
142
|
+
|
143
|
+
@tool()
|
144
|
+
def computer_right_click(timeout: int | None = None) -> Tool:
|
145
|
+
async def execute() -> ToolResult:
|
146
|
+
"""
|
147
|
+
Click the right mouse button.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
None
|
151
|
+
|
152
|
+
Returns:
|
153
|
+
A `list` with a single `ContentImage` of the screen.
|
154
|
+
"""
|
155
|
+
return await common.right_click(timeout=timeout)
|
156
|
+
|
157
|
+
return execute
|
158
|
+
|
159
|
+
|
160
|
+
# keysm list is from https://gist.github.com/rvaiya/be31f42049a4b5ad46666a8e120d9843
|
161
|
+
@tool()
|
162
|
+
def computer_key(timeout: int | None = None) -> Tool:
|
163
|
+
async def execute(key: str) -> ToolResult:
|
164
|
+
"""
|
165
|
+
Press a key or key-combination on the keyboard.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
key: The key or key-combination to press. Can be any key name supported by xdotool's `key` such as:
|
169
|
+
"Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
|
170
|
+
"Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
|
171
|
+
"F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
|
172
|
+
"Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
|
173
|
+
"KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
|
174
|
+
"KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal"
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
A `list` with a single `ContentImage` of the screen.
|
178
|
+
"""
|
179
|
+
return await common.press_key(key, timeout=timeout)
|
180
|
+
|
181
|
+
return execute
|
182
|
+
|
183
|
+
|
184
|
+
@tool()
|
185
|
+
def computer_type(timeout: int | None = None) -> Tool:
|
186
|
+
async def execute(text: str) -> ToolResult:
|
187
|
+
"""
|
188
|
+
Type a string of text on the keyboard.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
text: The text to type. If the text contains spaces, enclose it in quotes.
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
A `list` with a single `ContentImage` of the screen.
|
195
|
+
"""
|
196
|
+
return await common.type(text, timeout=timeout)
|
197
|
+
|
198
|
+
return execute
|
@@ -0,0 +1,100 @@
|
|
1
|
+
FROM docker.io/ubuntu:22.04
|
2
|
+
|
3
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
4
|
+
ENV DEBIAN_PRIORITY=high
|
5
|
+
|
6
|
+
# Core/system layer
|
7
|
+
RUN apt-get update && \
|
8
|
+
apt-get -y upgrade && \
|
9
|
+
apt-get -y install \
|
10
|
+
# A virtual framebuffer for running GUI applications without a physical display.
|
11
|
+
xvfb \
|
12
|
+
# A lightweight desktop environment for UNIX-like operating systems.
|
13
|
+
xfce4 \
|
14
|
+
# The terminal emulator for the xfce4 desktop environment.
|
15
|
+
xfce4-terminal\
|
16
|
+
# A VNC server for sharing X11 desktops.
|
17
|
+
x11vnc \
|
18
|
+
# A web based VNC client
|
19
|
+
novnc \
|
20
|
+
# A WebSocket to TCP proxy/bridge for noVNC
|
21
|
+
websockify \
|
22
|
+
# The Python programming language interpreter.
|
23
|
+
python3 \
|
24
|
+
# The package installer for Python.
|
25
|
+
python3-pip \
|
26
|
+
# A command-line tool for automating X11 applications (e.g., simulating keyboard/mouse inputs).
|
27
|
+
xdotool \
|
28
|
+
# A command-line tool for taking screenshots.
|
29
|
+
scrot \
|
30
|
+
# A suite for image manipulation — needed for scaling images.
|
31
|
+
imagemagick && \
|
32
|
+
apt-get clean
|
33
|
+
|
34
|
+
# Userland apt-get'able apps
|
35
|
+
RUN apt-get install -y --no-install-recommends \
|
36
|
+
# A simple image viewer.
|
37
|
+
xpaint \
|
38
|
+
# A calculator application.
|
39
|
+
galculator && \
|
40
|
+
apt-get clean
|
41
|
+
|
42
|
+
# install Firefox
|
43
|
+
RUN apt-get install -y software-properties-common && \
|
44
|
+
add-apt-repository ppa:mozillateam/ppa && \
|
45
|
+
apt-get update && \
|
46
|
+
apt-get install -y --no-install-recommends firefox-esr && \
|
47
|
+
apt-get clean
|
48
|
+
|
49
|
+
# install VS Code
|
50
|
+
RUN apt-get install -y \
|
51
|
+
gpg \
|
52
|
+
wget \
|
53
|
+
apt-transport-https \
|
54
|
+
software-properties-common && \
|
55
|
+
wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg && \
|
56
|
+
install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg && \
|
57
|
+
sh -c 'echo "deb [arch=amd64,arm64 signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" > /etc/apt/sources.list.d/vscode.list' && \
|
58
|
+
apt-get update && \
|
59
|
+
apt-get install -y code && \
|
60
|
+
apt-get clean
|
61
|
+
|
62
|
+
# configure noVNC
|
63
|
+
RUN ln -s /usr/share/novnc/vnc.html /usr/share/novnc/index.html
|
64
|
+
|
65
|
+
# We copy requirements.txt by itself so that changes to the scripts will be in a later layer
|
66
|
+
# and we only pip install if requirements.txt changes
|
67
|
+
COPY tool/requirements.txt /opt/inspect/tool/requirements.txt
|
68
|
+
RUN cd /opt/inspect/tool && pip3 install --no-cache-dir -r requirements.txt
|
69
|
+
|
70
|
+
COPY tool/ /opt/inspect/tool
|
71
|
+
COPY entrypoint/ /opt/inspect/entrypoint
|
72
|
+
RUN chmod -R 755 /opt/inspect
|
73
|
+
|
74
|
+
# setup user
|
75
|
+
ENV USERNAME=user
|
76
|
+
ENV HOME=/home/$USERNAME
|
77
|
+
RUN useradd -m -s /bin/bash -d $HOME $USERNAME
|
78
|
+
RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
|
79
|
+
USER ${USERNAME}
|
80
|
+
WORKDIR $HOME
|
81
|
+
COPY --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
|
82
|
+
|
83
|
+
# configure Firefox to skip all 'first run' UI
|
84
|
+
RUN mkdir -p $HOME/.mozilla/firefox-esr/profile.default && \
|
85
|
+
echo 'user_pref("browser.startup.homepage_override.mstone", "ignore");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
|
86
|
+
echo 'user_pref("browser.aboutwelcome.enabled", false);' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
|
87
|
+
echo 'user_pref("datareporting.policy.firstRunURL", "");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js
|
88
|
+
|
89
|
+
EXPOSE 5900
|
90
|
+
EXPOSE 6080
|
91
|
+
|
92
|
+
ARG DISPLAY_NUM=1
|
93
|
+
ARG WIDTH=1920
|
94
|
+
ARG HEIGHT=1080
|
95
|
+
ENV DISPLAY_NUM=$DISPLAY_NUM
|
96
|
+
ENV DISPLAY=:${DISPLAY_NUM}
|
97
|
+
ENV HEIGHT=$HEIGHT
|
98
|
+
ENV WIDTH=$WIDTH
|
99
|
+
|
100
|
+
ENTRYPOINT [ "/opt/inspect/entrypoint/entrypoint.sh" ]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# About This Image
|
2
|
+
|
3
|
+
This image was inspired by Anthropic's Computer Use Demo [here](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo/image).
|
4
|
+
|
5
|
+
Its goal is to provide the minimum infrastructure to support the use of Inspect's `computer_tool` to interact with the computer via X11 and `xdotool`, while also providing observability and interaction via VNC and noVNC.
|
6
|
+
|
7
|
+
The image extends this minimal functionality by adding a few basic applications — VS Code, Firefox, XPaint, and galculator.
|
8
|
+
|
9
|
+
## Entrypoint Directory
|
10
|
+
|
11
|
+
1. **Xvfb (X Virtual Framebuffer)**
|
12
|
+
- **Script:** `xvfb_startup.sh`
|
13
|
+
- **Description:** Xvfb is a display server that implements the X11 display server protocol. It runs in memory and does not require a physical display, useful for running graphical applications in a headless environment.
|
14
|
+
|
15
|
+
1. **xfce4**
|
16
|
+
- **Script:** `xfce4_startup.sh`
|
17
|
+
- **Description:** xfce4 is a lightweight desktop environment for UNIX-like operating systems. It aims to be fast, low on system resources, and user-friendly.
|
18
|
+
|
19
|
+
1. **x11vnc**
|
20
|
+
- **Script:** `x11vnc_startup.sh`
|
21
|
+
- **Description:** x11vnc is a VNC server that allows remote access to the X11 display. It enables users to connect to the virtual display environment from a remote machine using a VNC client.
|
22
|
+
|
23
|
+
1. **noVNC**
|
24
|
+
- **Script:** `novnc_startup.sh`
|
25
|
+
- **Description:** noVNC is a VNC client that runs in a web browser. It allows users to access the virtual display environment through a web interface without needing a separate VNC client application.
|
26
|
+
|
27
|
+
## Desktop Directory
|
28
|
+
|
29
|
+
The `Desktop` directory contains launchers for VS Code, Firefox and XPaint.
|
30
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
set -e
|
3
|
+
|
4
|
+
# remove marker files
|
5
|
+
rm -f /tmp/.X${DISPLAY_NUM}-lock
|
6
|
+
rm -f /tmp/xfce_started
|
7
|
+
|
8
|
+
/opt/inspect/entrypoint/xvfb_startup.sh
|
9
|
+
/opt/inspect/entrypoint/xfce_startup.sh
|
10
|
+
/opt/inspect/entrypoint/x11vnc_startup.sh
|
11
|
+
/opt/inspect/entrypoint/novnc_startup.sh
|
12
|
+
|
13
|
+
# Run CMD if provided
|
14
|
+
echo "Executing CMD from derived Dockerfile: $@"
|
15
|
+
exec "$@"
|
16
|
+
|
17
|
+
# Keep the container running
|
18
|
+
tail -f /dev/null
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
echo "starting noVNC"
|
3
|
+
|
4
|
+
# Start noVNC with explicit websocket settings
|
5
|
+
websockify \
|
6
|
+
--web=/usr/share/novnc/ \
|
7
|
+
6080 localhost:5900 \
|
8
|
+
> /tmp/novnc.log 2>&1 &
|
9
|
+
|
10
|
+
# Wait for noVNC to start
|
11
|
+
timeout=10
|
12
|
+
while [ $timeout -gt 0 ]; do
|
13
|
+
if netstat -tuln | grep -q ":6080 "; then
|
14
|
+
break
|
15
|
+
fi
|
16
|
+
sleep 1
|
17
|
+
((timeout--))
|
18
|
+
done
|
19
|
+
|
20
|
+
echo "noVNC started successfully"
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
echo "starting vnc"
|
3
|
+
|
4
|
+
(x11vnc -display $DISPLAY \
|
5
|
+
-forever \
|
6
|
+
-shared \
|
7
|
+
-wait 50 \
|
8
|
+
-cursor most \
|
9
|
+
-cursor arrow \
|
10
|
+
-rfbport 5900 \
|
11
|
+
-nopw \
|
12
|
+
2>/tmp/x11vnc_stderr.log) &
|
13
|
+
|
14
|
+
x11vnc_pid=$!
|
15
|
+
|
16
|
+
# Wait for x11vnc to start
|
17
|
+
timeout=10
|
18
|
+
while [ $timeout -gt 0 ]; do
|
19
|
+
if netstat -tuln | grep -q ":5900 "; then
|
20
|
+
break
|
21
|
+
fi
|
22
|
+
sleep 1
|
23
|
+
((timeout--))
|
24
|
+
done
|
25
|
+
|
26
|
+
if [ $timeout -eq 0 ]; then
|
27
|
+
echo "x11vnc failed to start, stderr output:" >&2
|
28
|
+
cat /tmp/x11vnc_stderr.log >&2
|
29
|
+
exit 1
|
30
|
+
fi
|
31
|
+
|
32
|
+
: > /tmp/x11vnc_stderr.log
|
33
|
+
|
34
|
+
# Monitor x11vnc process in the background
|
35
|
+
(
|
36
|
+
while true; do
|
37
|
+
if ! kill -0 $x11vnc_pid 2>/dev/null; then
|
38
|
+
echo "x11vnc process crashed, restarting..." >&2
|
39
|
+
if [ -f /tmp/x11vnc_stderr.log ]; then
|
40
|
+
echo "x11vnc stderr output:" >&2
|
41
|
+
cat /tmp/x11vnc_stderr.log >&2
|
42
|
+
rm /tmp/x11vnc_stderr.log
|
43
|
+
fi
|
44
|
+
exec "$0"
|
45
|
+
fi
|
46
|
+
sleep 5
|
47
|
+
done
|
48
|
+
) &
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
set -e # Exit on error
|
3
|
+
|
4
|
+
DPI=96
|
5
|
+
RES_AND_DEPTH=${WIDTH}x${HEIGHT}x24
|
6
|
+
|
7
|
+
# Function to check if Xvfb is already running
|
8
|
+
check_xvfb_running() {
|
9
|
+
if [ -e /tmp/.X${DISPLAY_NUM}-lock ]; then
|
10
|
+
return 0 # Xvfb is already running
|
11
|
+
else
|
12
|
+
return 1 # Xvfb is not running
|
13
|
+
fi
|
14
|
+
}
|
15
|
+
|
16
|
+
# Function to check if Xvfb is ready
|
17
|
+
wait_for_xvfb() {
|
18
|
+
local timeout=10
|
19
|
+
local start_time=$(date +%s)
|
20
|
+
while ! xdpyinfo >/dev/null 2>&1; do
|
21
|
+
if [ $(($(date +%s) - start_time)) -gt $timeout ]; then
|
22
|
+
echo "Xvfb failed to start within $timeout seconds" >&2
|
23
|
+
return 1
|
24
|
+
fi
|
25
|
+
sleep 0.1
|
26
|
+
done
|
27
|
+
return 0
|
28
|
+
}
|
29
|
+
|
30
|
+
# Check if Xvfb is already running
|
31
|
+
if check_xvfb_running; then
|
32
|
+
echo "Xvfb is already running on display ${DISPLAY}"
|
33
|
+
exit 0
|
34
|
+
fi
|
35
|
+
|
36
|
+
# Start Xvfb
|
37
|
+
Xvfb $DISPLAY -ac -screen 0 $RES_AND_DEPTH -retro -dpi $DPI -nolisten tcp -nolisten unix &
|
38
|
+
XVFB_PID=$!
|
39
|
+
|
40
|
+
# Wait for Xvfb to start
|
41
|
+
if wait_for_xvfb; then
|
42
|
+
echo "Xvfb started successfully on display ${DISPLAY}"
|
43
|
+
echo "Xvfb PID: $XVFB_PID"
|
44
|
+
else
|
45
|
+
echo "Xvfb failed to start"
|
46
|
+
kill $XVFB_PID
|
47
|
+
exit 1
|
48
|
+
fi
|
File without changes
|