inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -9
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +79 -12
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +10 -1
- inspect_ai/_eval/loader.py +79 -19
- inspect_ai/_eval/registry.py +6 -0
- inspect_ai/_eval/score.py +3 -1
- inspect_ai/_eval/task/results.py +51 -22
- inspect_ai/_eval/task/run.py +47 -13
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25498 -2044
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +14 -16
- inspect_ai/_view/www/src/Types.mjs +1 -2
- inspect_ai/_view/www/src/api/Types.ts +133 -0
- inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
- inspect_ai/_view/www/src/api/api-http.ts +219 -0
- inspect_ai/_view/www/src/api/api-shared.ts +47 -0
- inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
- inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
- inspect_ai/_view/www/src/api/index.ts +51 -0
- inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +77 -4
- inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
- inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
- inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
- inspect_ai/_view/www/src/utils/vscode.ts +36 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/manager.py +1 -1
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +59 -0
- inspect_ai/model/_conversation.py +16 -7
- inspect_ai/model/_generate_config.py +12 -12
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +22 -2
- inspect_ai/model/_openai.py +383 -0
- inspect_ai/model/_providers/anthropic.py +152 -55
- inspect_ai/model/_providers/azureai.py +21 -21
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/goodfire.py +248 -0
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/groq.py +7 -3
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +13 -12
- inspect_ai/model/_providers/openai.py +51 -218
- inspect_ai/model/_providers/openai_o1.py +11 -12
- inspect_ai/model/_providers/providers.py +23 -1
- inspect_ai/model/_providers/together.py +12 -12
- inspect_ai/model/_providers/util/__init__.py +2 -3
- inspect_ai/model/_providers/util/hf_handler.py +1 -1
- inspect_ai/model/_providers/util/llama31.py +1 -1
- inspect_ai/model/_providers/util/util.py +0 -76
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_metric.py +3 -0
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +4 -3
- inspect_ai/solver/__init__.py +4 -5
- inspect_ai/solver/_basic_agent.py +1 -1
- inspect_ai/solver/_bridge/__init__.py +3 -0
- inspect_ai/solver/_bridge/bridge.py +100 -0
- inspect_ai/solver/_bridge/patch.py +170 -0
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_solver.py +6 -0
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_display.py +5 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
- inspect_ai/_view/www/src/api/Types.mjs +0 -117
- inspect_ai/_view/www/src/api/api-http.mjs +0 -300
- inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
- inspect_ai/_view/www/src/api/index.mjs +0 -49
- inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
"""
|
2
|
+
This module provides the same functionality as the computer tool but via a list of per-action tools . e.g. computer_mouse_move(100, 100).
|
3
|
+
|
4
|
+
The split version is not publicly exported, but is retained until we decide if it performs better than the monolithic computer tool.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Awaitable, Callable
|
8
|
+
|
9
|
+
from inspect_ai.tool import Tool, ToolResult, tool
|
10
|
+
|
11
|
+
from . import _common as common
|
12
|
+
|
13
|
+
ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
|
14
|
+
|
15
|
+
|
16
|
+
def computer_split(timeout: int | None = None) -> list[Tool]:
|
17
|
+
"""
|
18
|
+
Computer interaction tools.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
timeout (int | None): Timeout (in seconds) for command.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
List of computer interaction tools.
|
25
|
+
"""
|
26
|
+
return [
|
27
|
+
computer_cursor_position(),
|
28
|
+
computer_screenshot(),
|
29
|
+
computer_mouse_move(),
|
30
|
+
computer_left_click(),
|
31
|
+
computer_double_click(),
|
32
|
+
computer_left_click_drag(),
|
33
|
+
computer_right_click(),
|
34
|
+
computer_key(),
|
35
|
+
computer_type(),
|
36
|
+
]
|
37
|
+
|
38
|
+
|
39
|
+
@tool()
|
40
|
+
def computer_cursor_position(timeout: int | None = None) -> Tool:
|
41
|
+
async def execute() -> ToolResult:
|
42
|
+
"""
|
43
|
+
Get the current (x, y) pixel coordinate of the cursor on the screen.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
None
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
A `str` of the form "x y" where x and y are the current mouse coordinates.
|
50
|
+
"""
|
51
|
+
return await common.cursor_position(timeout=timeout)
|
52
|
+
|
53
|
+
return execute
|
54
|
+
|
55
|
+
|
56
|
+
@tool()
|
57
|
+
def computer_screenshot(timeout: int | None = None) -> Tool:
|
58
|
+
async def execute() -> ToolResult:
|
59
|
+
"""
|
60
|
+
Take a screenshot.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
None
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
A `list` with a single `ContentImage` of the screen.
|
67
|
+
"""
|
68
|
+
return await common.screenshot(timeout=timeout)
|
69
|
+
|
70
|
+
return execute
|
71
|
+
|
72
|
+
|
73
|
+
@tool()
|
74
|
+
def computer_mouse_move(timeout: int | None = None) -> Tool:
|
75
|
+
async def execute(x: int, y: int) -> ToolResult:
|
76
|
+
"""
|
77
|
+
Move the cursor to a specified (x, y) pixel coordinate on the screen.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
x: X coordinate of the mouse destination.
|
81
|
+
y: Y coordinate of the mouse destination.
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
A `list` with a single `ContentImage` of the screen.
|
85
|
+
"""
|
86
|
+
return await common.mouse_move(x, y, timeout=timeout)
|
87
|
+
|
88
|
+
return execute
|
89
|
+
|
90
|
+
|
91
|
+
@tool()
|
92
|
+
def computer_left_click(timeout: int | None = None) -> Tool:
|
93
|
+
async def execute() -> ToolResult:
|
94
|
+
"""
|
95
|
+
Click the left mouse button.
|
96
|
+
|
97
|
+
Args:
|
98
|
+
None
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
A `list` with a single `ContentImage` of the screen.
|
102
|
+
"""
|
103
|
+
return await common.left_click(timeout=timeout)
|
104
|
+
|
105
|
+
return execute
|
106
|
+
|
107
|
+
|
108
|
+
@tool()
|
109
|
+
def computer_double_click(timeout: int | None = None) -> Tool:
|
110
|
+
async def execute() -> ToolResult:
|
111
|
+
"""
|
112
|
+
Double-click the left mouse button.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
None
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
A `list` with a single `ContentImage` of the screen.
|
119
|
+
"""
|
120
|
+
return await common.double_click(timeout=timeout)
|
121
|
+
|
122
|
+
return execute
|
123
|
+
|
124
|
+
|
125
|
+
@tool()
|
126
|
+
def computer_left_click_drag(timeout: int | None = None) -> Tool:
|
127
|
+
async def execute(x: int, y: int) -> ToolResult:
|
128
|
+
"""
|
129
|
+
Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
x: X coordinate of the mouse destination.
|
133
|
+
y: Y coordinate of the mouse destination.
|
134
|
+
|
135
|
+
Returns:
|
136
|
+
A `list` with a single `ContentImage` of the screen.
|
137
|
+
"""
|
138
|
+
return await common.left_click_drag(x, y, timeout=timeout)
|
139
|
+
|
140
|
+
return execute
|
141
|
+
|
142
|
+
|
143
|
+
@tool()
|
144
|
+
def computer_right_click(timeout: int | None = None) -> Tool:
|
145
|
+
async def execute() -> ToolResult:
|
146
|
+
"""
|
147
|
+
Click the right mouse button.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
None
|
151
|
+
|
152
|
+
Returns:
|
153
|
+
A `list` with a single `ContentImage` of the screen.
|
154
|
+
"""
|
155
|
+
return await common.right_click(timeout=timeout)
|
156
|
+
|
157
|
+
return execute
|
158
|
+
|
159
|
+
|
160
|
+
# keysm list is from https://gist.github.com/rvaiya/be31f42049a4b5ad46666a8e120d9843
|
161
|
+
@tool()
|
162
|
+
def computer_key(timeout: int | None = None) -> Tool:
|
163
|
+
async def execute(key: str) -> ToolResult:
|
164
|
+
"""
|
165
|
+
Press a key or key-combination on the keyboard.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
key: The key or key-combination to press. Can be any key name supported by xdotool's `key` such as:
|
169
|
+
"Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
|
170
|
+
"Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
|
171
|
+
"F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
|
172
|
+
"Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
|
173
|
+
"KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
|
174
|
+
"KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal"
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
A `list` with a single `ContentImage` of the screen.
|
178
|
+
"""
|
179
|
+
return await common.press_key(key, timeout=timeout)
|
180
|
+
|
181
|
+
return execute
|
182
|
+
|
183
|
+
|
184
|
+
@tool()
|
185
|
+
def computer_type(timeout: int | None = None) -> Tool:
|
186
|
+
async def execute(text: str) -> ToolResult:
|
187
|
+
"""
|
188
|
+
Type a string of text on the keyboard.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
text: The text to type. If the text contains spaces, enclose it in quotes.
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
A `list` with a single `ContentImage` of the screen.
|
195
|
+
"""
|
196
|
+
return await common.type(text, timeout=timeout)
|
197
|
+
|
198
|
+
return execute
|
@@ -0,0 +1,100 @@
|
|
1
|
+
FROM docker.io/ubuntu:22.04
|
2
|
+
|
3
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
4
|
+
ENV DEBIAN_PRIORITY=high
|
5
|
+
|
6
|
+
# Core/system layer
|
7
|
+
RUN apt-get update && \
|
8
|
+
apt-get -y upgrade && \
|
9
|
+
apt-get -y install \
|
10
|
+
# A virtual framebuffer for running GUI applications without a physical display.
|
11
|
+
xvfb \
|
12
|
+
# A lightweight desktop environment for UNIX-like operating systems.
|
13
|
+
xfce4 \
|
14
|
+
# The terminal emulator for the xfce4 desktop environment.
|
15
|
+
xfce4-terminal\
|
16
|
+
# A VNC server for sharing X11 desktops.
|
17
|
+
x11vnc \
|
18
|
+
# A web based VNC client
|
19
|
+
novnc \
|
20
|
+
# A WebSocket to TCP proxy/bridge for noVNC
|
21
|
+
websockify \
|
22
|
+
# The Python programming language interpreter.
|
23
|
+
python3 \
|
24
|
+
# The package installer for Python.
|
25
|
+
python3-pip \
|
26
|
+
# A command-line tool for automating X11 applications (e.g., simulating keyboard/mouse inputs).
|
27
|
+
xdotool \
|
28
|
+
# A command-line tool for taking screenshots.
|
29
|
+
scrot \
|
30
|
+
# A suite for image manipulation — needed for scaling images.
|
31
|
+
imagemagick && \
|
32
|
+
apt-get clean
|
33
|
+
|
34
|
+
# Userland apt-get'able apps
|
35
|
+
RUN apt-get install -y --no-install-recommends \
|
36
|
+
# A simple image viewer.
|
37
|
+
xpaint \
|
38
|
+
# A calculator application.
|
39
|
+
galculator && \
|
40
|
+
apt-get clean
|
41
|
+
|
42
|
+
# install Firefox
|
43
|
+
RUN apt-get install -y software-properties-common && \
|
44
|
+
add-apt-repository ppa:mozillateam/ppa && \
|
45
|
+
apt-get update && \
|
46
|
+
apt-get install -y --no-install-recommends firefox-esr && \
|
47
|
+
apt-get clean
|
48
|
+
|
49
|
+
# install VS Code
|
50
|
+
RUN apt-get install -y \
|
51
|
+
gpg \
|
52
|
+
wget \
|
53
|
+
apt-transport-https \
|
54
|
+
software-properties-common && \
|
55
|
+
wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg && \
|
56
|
+
install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg && \
|
57
|
+
sh -c 'echo "deb [arch=amd64,arm64 signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" > /etc/apt/sources.list.d/vscode.list' && \
|
58
|
+
apt-get update && \
|
59
|
+
apt-get install -y code && \
|
60
|
+
apt-get clean
|
61
|
+
|
62
|
+
# configure noVNC
|
63
|
+
RUN ln -s /usr/share/novnc/vnc.html /usr/share/novnc/index.html
|
64
|
+
|
65
|
+
# We copy requirements.txt by itself so that changes to the scripts will be in a later layer
|
66
|
+
# and we only pip install if requirements.txt changes
|
67
|
+
COPY tool/requirements.txt /opt/inspect/tool/requirements.txt
|
68
|
+
RUN cd /opt/inspect/tool && pip3 install --no-cache-dir -r requirements.txt
|
69
|
+
|
70
|
+
COPY tool/ /opt/inspect/tool
|
71
|
+
COPY entrypoint/ /opt/inspect/entrypoint
|
72
|
+
RUN chmod -R 755 /opt/inspect
|
73
|
+
|
74
|
+
# setup user
|
75
|
+
ENV USERNAME=user
|
76
|
+
ENV HOME=/home/$USERNAME
|
77
|
+
RUN useradd -m -s /bin/bash -d $HOME $USERNAME
|
78
|
+
RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
|
79
|
+
USER ${USERNAME}
|
80
|
+
WORKDIR $HOME
|
81
|
+
COPY --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
|
82
|
+
|
83
|
+
# configure Firefox to skip all 'first run' UI
|
84
|
+
RUN mkdir -p $HOME/.mozilla/firefox-esr/profile.default && \
|
85
|
+
echo 'user_pref("browser.startup.homepage_override.mstone", "ignore");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
|
86
|
+
echo 'user_pref("browser.aboutwelcome.enabled", false);' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
|
87
|
+
echo 'user_pref("datareporting.policy.firstRunURL", "");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js
|
88
|
+
|
89
|
+
EXPOSE 5900
|
90
|
+
EXPOSE 6080
|
91
|
+
|
92
|
+
ARG DISPLAY_NUM=1
|
93
|
+
ARG WIDTH=1920
|
94
|
+
ARG HEIGHT=1080
|
95
|
+
ENV DISPLAY_NUM=$DISPLAY_NUM
|
96
|
+
ENV DISPLAY=:${DISPLAY_NUM}
|
97
|
+
ENV HEIGHT=$HEIGHT
|
98
|
+
ENV WIDTH=$WIDTH
|
99
|
+
|
100
|
+
ENTRYPOINT [ "/opt/inspect/entrypoint/entrypoint.sh" ]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# About This Image
|
2
|
+
|
3
|
+
This image was inspired by Anthropic's Computer Use Demo [here](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo/image).
|
4
|
+
|
5
|
+
Its goal is to provide the minimum infrastructure to support the use of Inspect's `computer_tool` to interact with the computer via X11 and `xdotool`, while also providing observability and interaction via VNC and noVNC.
|
6
|
+
|
7
|
+
The image extends this minimal functionality by adding a few basic applications — VS Code, Firefox, XPaint, and galculator.
|
8
|
+
|
9
|
+
## Entrypoint Directory
|
10
|
+
|
11
|
+
1. **Xvfb (X Virtual Framebuffer)**
|
12
|
+
- **Script:** `xvfb_startup.sh`
|
13
|
+
- **Description:** Xvfb is a display server that implements the X11 display server protocol. It runs in memory and does not require a physical display, useful for running graphical applications in a headless environment.
|
14
|
+
|
15
|
+
1. **xfce4**
|
16
|
+
- **Script:** `xfce4_startup.sh`
|
17
|
+
- **Description:** xfce4 is a lightweight desktop environment for UNIX-like operating systems. It aims to be fast, low on system resources, and user-friendly.
|
18
|
+
|
19
|
+
1. **x11vnc**
|
20
|
+
- **Script:** `x11vnc_startup.sh`
|
21
|
+
- **Description:** x11vnc is a VNC server that allows remote access to the X11 display. It enables users to connect to the virtual display environment from a remote machine using a VNC client.
|
22
|
+
|
23
|
+
1. **noVNC**
|
24
|
+
- **Script:** `novnc_startup.sh`
|
25
|
+
- **Description:** noVNC is a VNC client that runs in a web browser. It allows users to access the virtual display environment through a web interface without needing a separate VNC client application.
|
26
|
+
|
27
|
+
## Desktop Directory
|
28
|
+
|
29
|
+
The `Desktop` directory contains launchers for VS Code, Firefox and XPaint.
|
30
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
set -e
|
3
|
+
|
4
|
+
# remove marker files
|
5
|
+
rm -f /tmp/.X${DISPLAY_NUM}-lock
|
6
|
+
rm -f /tmp/xfce_started
|
7
|
+
|
8
|
+
/opt/inspect/entrypoint/xvfb_startup.sh
|
9
|
+
/opt/inspect/entrypoint/xfce_startup.sh
|
10
|
+
/opt/inspect/entrypoint/x11vnc_startup.sh
|
11
|
+
/opt/inspect/entrypoint/novnc_startup.sh
|
12
|
+
|
13
|
+
# Run CMD if provided
|
14
|
+
echo "Executing CMD from derived Dockerfile: $@"
|
15
|
+
exec "$@"
|
16
|
+
|
17
|
+
# Keep the container running
|
18
|
+
tail -f /dev/null
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
echo "starting noVNC"
|
3
|
+
|
4
|
+
# Start noVNC with explicit websocket settings
|
5
|
+
websockify \
|
6
|
+
--web=/usr/share/novnc/ \
|
7
|
+
6080 localhost:5900 \
|
8
|
+
> /tmp/novnc.log 2>&1 &
|
9
|
+
|
10
|
+
# Wait for noVNC to start
|
11
|
+
timeout=10
|
12
|
+
while [ $timeout -gt 0 ]; do
|
13
|
+
if netstat -tuln | grep -q ":6080 "; then
|
14
|
+
break
|
15
|
+
fi
|
16
|
+
sleep 1
|
17
|
+
((timeout--))
|
18
|
+
done
|
19
|
+
|
20
|
+
echo "noVNC started successfully"
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
echo "starting vnc"
|
3
|
+
|
4
|
+
(x11vnc -display $DISPLAY \
|
5
|
+
-forever \
|
6
|
+
-shared \
|
7
|
+
-wait 50 \
|
8
|
+
-cursor most \
|
9
|
+
-cursor arrow \
|
10
|
+
-rfbport 5900 \
|
11
|
+
-nopw \
|
12
|
+
2>/tmp/x11vnc_stderr.log) &
|
13
|
+
|
14
|
+
x11vnc_pid=$!
|
15
|
+
|
16
|
+
# Wait for x11vnc to start
|
17
|
+
timeout=10
|
18
|
+
while [ $timeout -gt 0 ]; do
|
19
|
+
if netstat -tuln | grep -q ":5900 "; then
|
20
|
+
break
|
21
|
+
fi
|
22
|
+
sleep 1
|
23
|
+
((timeout--))
|
24
|
+
done
|
25
|
+
|
26
|
+
if [ $timeout -eq 0 ]; then
|
27
|
+
echo "x11vnc failed to start, stderr output:" >&2
|
28
|
+
cat /tmp/x11vnc_stderr.log >&2
|
29
|
+
exit 1
|
30
|
+
fi
|
31
|
+
|
32
|
+
: > /tmp/x11vnc_stderr.log
|
33
|
+
|
34
|
+
# Monitor x11vnc process in the background
|
35
|
+
(
|
36
|
+
while true; do
|
37
|
+
if ! kill -0 $x11vnc_pid 2>/dev/null; then
|
38
|
+
echo "x11vnc process crashed, restarting..." >&2
|
39
|
+
if [ -f /tmp/x11vnc_stderr.log ]; then
|
40
|
+
echo "x11vnc stderr output:" >&2
|
41
|
+
cat /tmp/x11vnc_stderr.log >&2
|
42
|
+
rm /tmp/x11vnc_stderr.log
|
43
|
+
fi
|
44
|
+
exec "$0"
|
45
|
+
fi
|
46
|
+
sleep 5
|
47
|
+
done
|
48
|
+
) &
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
set -e # Exit on error
|
3
|
+
|
4
|
+
DPI=96
|
5
|
+
RES_AND_DEPTH=${WIDTH}x${HEIGHT}x24
|
6
|
+
|
7
|
+
# Function to check if Xvfb is already running
|
8
|
+
check_xvfb_running() {
|
9
|
+
if [ -e /tmp/.X${DISPLAY_NUM}-lock ]; then
|
10
|
+
return 0 # Xvfb is already running
|
11
|
+
else
|
12
|
+
return 1 # Xvfb is not running
|
13
|
+
fi
|
14
|
+
}
|
15
|
+
|
16
|
+
# Function to check if Xvfb is ready
|
17
|
+
wait_for_xvfb() {
|
18
|
+
local timeout=10
|
19
|
+
local start_time=$(date +%s)
|
20
|
+
while ! xdpyinfo >/dev/null 2>&1; do
|
21
|
+
if [ $(($(date +%s) - start_time)) -gt $timeout ]; then
|
22
|
+
echo "Xvfb failed to start within $timeout seconds" >&2
|
23
|
+
return 1
|
24
|
+
fi
|
25
|
+
sleep 0.1
|
26
|
+
done
|
27
|
+
return 0
|
28
|
+
}
|
29
|
+
|
30
|
+
# Check if Xvfb is already running
|
31
|
+
if check_xvfb_running; then
|
32
|
+
echo "Xvfb is already running on display ${DISPLAY}"
|
33
|
+
exit 0
|
34
|
+
fi
|
35
|
+
|
36
|
+
# Start Xvfb
|
37
|
+
Xvfb $DISPLAY -ac -screen 0 $RES_AND_DEPTH -retro -dpi $DPI -nolisten tcp -nolisten unix &
|
38
|
+
XVFB_PID=$!
|
39
|
+
|
40
|
+
# Wait for Xvfb to start
|
41
|
+
if wait_for_xvfb; then
|
42
|
+
echo "Xvfb started successfully on display ${DISPLAY}"
|
43
|
+
echo "Xvfb PID: $XVFB_PID"
|
44
|
+
else
|
45
|
+
echo "Xvfb failed to start"
|
46
|
+
kill $XVFB_PID
|
47
|
+
exit 1
|
48
|
+
fi
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
|
4
|
+
def setup_logger(level=logging.INFO):
|
5
|
+
"""
|
6
|
+
This logger emits all of its output to PID 1's stdout.
|
7
|
+
|
8
|
+
This makes it so that logging from invocations of the computer_tool cli show up in `docker logs` output.
|
9
|
+
"""
|
10
|
+
new_logger = logging.getLogger("computer_tool")
|
11
|
+
new_logger.setLevel(level)
|
12
|
+
|
13
|
+
stdout_handler = logging.FileHandler("/proc/1/fd/1", mode="w")
|
14
|
+
stdout_handler.setLevel(level)
|
15
|
+
stdout_handler.setFormatter(
|
16
|
+
logging.Formatter("%(name)s(pid=%(process)d) - %(levelname)s - %(message)s")
|
17
|
+
)
|
18
|
+
|
19
|
+
if not new_logger.handlers:
|
20
|
+
new_logger.addHandler(stdout_handler)
|
21
|
+
|
22
|
+
return new_logger
|
@@ -0,0 +1,42 @@
|
|
1
|
+
"""Utility to run shell commands asynchronously with a timeout."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
|
5
|
+
TRUNCATED_MESSAGE: str = "<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>"
|
6
|
+
MAX_RESPONSE_LEN: int = 16000
|
7
|
+
|
8
|
+
|
9
|
+
def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
|
10
|
+
"""Truncate content and append a notice if content exceeds the specified length."""
|
11
|
+
return (
|
12
|
+
content
|
13
|
+
if not truncate_after or len(content) <= truncate_after
|
14
|
+
else content[:truncate_after] + TRUNCATED_MESSAGE
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
async def run(
|
19
|
+
cmd: str,
|
20
|
+
timeout: float | None = 120.0, # seconds
|
21
|
+
truncate_after: int | None = MAX_RESPONSE_LEN,
|
22
|
+
):
|
23
|
+
"""Run a shell command asynchronously with a timeout."""
|
24
|
+
process = await asyncio.create_subprocess_shell(
|
25
|
+
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
26
|
+
)
|
27
|
+
|
28
|
+
try:
|
29
|
+
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
|
30
|
+
return (
|
31
|
+
process.returncode or 0,
|
32
|
+
maybe_truncate(stdout.decode(), truncate_after=truncate_after),
|
33
|
+
maybe_truncate(stderr.decode(), truncate_after=truncate_after),
|
34
|
+
)
|
35
|
+
except asyncio.TimeoutError as exc:
|
36
|
+
try:
|
37
|
+
process.kill()
|
38
|
+
except ProcessLookupError:
|
39
|
+
pass
|
40
|
+
raise TimeoutError(
|
41
|
+
f"Command '{cmd}' timed out after {timeout} seconds"
|
42
|
+
) from exc
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from dataclasses import dataclass, fields, replace
|
2
|
+
|
3
|
+
|
4
|
+
@dataclass(kw_only=True, frozen=True)
|
5
|
+
class ToolResult:
|
6
|
+
"""Represents the result of a tool execution."""
|
7
|
+
|
8
|
+
output: str | None = None
|
9
|
+
error: str | None = None
|
10
|
+
base64_image: str | None = None
|
11
|
+
|
12
|
+
def __bool__(self):
|
13
|
+
return any(getattr(self, field.name) for field in fields(self))
|
14
|
+
|
15
|
+
def __add__(self, other: "ToolResult"):
|
16
|
+
def combine_fields(
|
17
|
+
field: str | None, other_field: str | None, concatenate: bool = True
|
18
|
+
):
|
19
|
+
if field and other_field:
|
20
|
+
if concatenate:
|
21
|
+
return field + other_field
|
22
|
+
raise ValueError("Cannot combine tool results")
|
23
|
+
return field or other_field
|
24
|
+
|
25
|
+
return ToolResult(
|
26
|
+
output=combine_fields(self.output, other.output),
|
27
|
+
error=combine_fields(self.error, other.error),
|
28
|
+
base64_image=combine_fields(self.base64_image, other.base64_image, False),
|
29
|
+
)
|
30
|
+
|
31
|
+
def replace(self, **kwargs):
|
32
|
+
"""Returns a new ToolResult with the given fields replaced."""
|
33
|
+
return replace(self, **kwargs)
|