inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -9
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +79 -12
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/eval.py +10 -1
  13. inspect_ai/_eval/loader.py +79 -19
  14. inspect_ai/_eval/registry.py +6 -0
  15. inspect_ai/_eval/score.py +3 -1
  16. inspect_ai/_eval/task/results.py +51 -22
  17. inspect_ai/_eval/task/run.py +47 -13
  18. inspect_ai/_eval/task/sandbox.py +10 -5
  19. inspect_ai/_util/constants.py +1 -0
  20. inspect_ai/_util/port_names.py +61 -0
  21. inspect_ai/_util/text.py +23 -0
  22. inspect_ai/_view/www/App.css +31 -1
  23. inspect_ai/_view/www/dist/assets/index.css +31 -1
  24. inspect_ai/_view/www/dist/assets/index.js +25498 -2044
  25. inspect_ai/_view/www/log-schema.json +32 -2
  26. inspect_ai/_view/www/package.json +2 -0
  27. inspect_ai/_view/www/src/App.mjs +14 -16
  28. inspect_ai/_view/www/src/Types.mjs +1 -2
  29. inspect_ai/_view/www/src/api/Types.ts +133 -0
  30. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  31. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  32. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  33. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  34. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  35. inspect_ai/_view/www/src/api/index.ts +51 -0
  36. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  37. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  38. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  39. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  40. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  41. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  42. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  43. inspect_ai/_view/www/src/index.js +77 -4
  44. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
  46. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
  47. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  48. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  49. inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
  50. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  51. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  52. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
  53. inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
  54. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  55. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  56. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
  76. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  77. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
  78. inspect_ai/_view/www/vite.config.js +7 -0
  79. inspect_ai/_view/www/yarn.lock +116 -0
  80. inspect_ai/approval/_human/__init__.py +0 -0
  81. inspect_ai/approval/_human/manager.py +1 -1
  82. inspect_ai/approval/_policy.py +12 -6
  83. inspect_ai/log/_log.py +1 -1
  84. inspect_ai/log/_samples.py +16 -0
  85. inspect_ai/log/_transcript.py +4 -1
  86. inspect_ai/model/_call_tools.py +59 -0
  87. inspect_ai/model/_conversation.py +16 -7
  88. inspect_ai/model/_generate_config.py +12 -12
  89. inspect_ai/model/_model.py +117 -18
  90. inspect_ai/model/_model_output.py +22 -2
  91. inspect_ai/model/_openai.py +383 -0
  92. inspect_ai/model/_providers/anthropic.py +152 -55
  93. inspect_ai/model/_providers/azureai.py +21 -21
  94. inspect_ai/model/_providers/bedrock.py +37 -40
  95. inspect_ai/model/_providers/goodfire.py +248 -0
  96. inspect_ai/model/_providers/google.py +46 -54
  97. inspect_ai/model/_providers/groq.py +7 -3
  98. inspect_ai/model/_providers/hf.py +6 -0
  99. inspect_ai/model/_providers/mistral.py +13 -12
  100. inspect_ai/model/_providers/openai.py +51 -218
  101. inspect_ai/model/_providers/openai_o1.py +11 -12
  102. inspect_ai/model/_providers/providers.py +23 -1
  103. inspect_ai/model/_providers/together.py +12 -12
  104. inspect_ai/model/_providers/util/__init__.py +2 -3
  105. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  106. inspect_ai/model/_providers/util/llama31.py +1 -1
  107. inspect_ai/model/_providers/util/util.py +0 -76
  108. inspect_ai/model/_providers/vertex.py +1 -4
  109. inspect_ai/scorer/_metric.py +3 -0
  110. inspect_ai/scorer/_reducer/reducer.py +1 -1
  111. inspect_ai/scorer/_scorer.py +4 -3
  112. inspect_ai/solver/__init__.py +4 -5
  113. inspect_ai/solver/_basic_agent.py +1 -1
  114. inspect_ai/solver/_bridge/__init__.py +3 -0
  115. inspect_ai/solver/_bridge/bridge.py +100 -0
  116. inspect_ai/solver/_bridge/patch.py +170 -0
  117. inspect_ai/solver/_prompt.py +35 -5
  118. inspect_ai/solver/_solver.py +6 -0
  119. inspect_ai/solver/_task_state.py +80 -38
  120. inspect_ai/tool/__init__.py +2 -0
  121. inspect_ai/tool/_tool.py +12 -1
  122. inspect_ai/tool/_tool_call.py +10 -0
  123. inspect_ai/tool/_tool_def.py +16 -5
  124. inspect_ai/tool/_tool_with.py +21 -4
  125. inspect_ai/tool/beta/__init__.py +5 -0
  126. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  127. inspect_ai/tool/beta/_computer/_common.py +133 -0
  128. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  129. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  130. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  131. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  134. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  135. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  136. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  137. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  138. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  139. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  144. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  145. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  146. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  147. inspect_ai/util/__init__.py +2 -0
  148. inspect_ai/util/_display.py +5 -0
  149. inspect_ai/util/_limit.py +26 -0
  150. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  153. inspect_ai/util/_sandbox/environment.py +14 -0
  154. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  155. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
  156. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  157. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  158. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  159. inspect_ai/_view/www/src/api/index.mjs +0 -49
  160. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  161. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  162. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  163. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  164. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  165. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  166. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,198 @@
1
+ """
2
+ This module provides the same functionality as the computer tool but via a list of per-action tools . e.g. computer_mouse_move(100, 100).
3
+
4
+ The split version is not publicly exported, but is retained until we decide if it performs better than the monolithic computer tool.
5
+ """
6
+
7
+ from typing import Awaitable, Callable
8
+
9
+ from inspect_ai.tool import Tool, ToolResult, tool
10
+
11
+ from . import _common as common
12
+
13
+ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
14
+
15
+
16
+ def computer_split(timeout: int | None = None) -> list[Tool]:
17
+ """
18
+ Computer interaction tools.
19
+
20
+ Args:
21
+ timeout (int | None): Timeout (in seconds) for command.
22
+
23
+ Returns:
24
+ List of computer interaction tools.
25
+ """
26
+ return [
27
+ computer_cursor_position(),
28
+ computer_screenshot(),
29
+ computer_mouse_move(),
30
+ computer_left_click(),
31
+ computer_double_click(),
32
+ computer_left_click_drag(),
33
+ computer_right_click(),
34
+ computer_key(),
35
+ computer_type(),
36
+ ]
37
+
38
+
39
+ @tool()
40
+ def computer_cursor_position(timeout: int | None = None) -> Tool:
41
+ async def execute() -> ToolResult:
42
+ """
43
+ Get the current (x, y) pixel coordinate of the cursor on the screen.
44
+
45
+ Args:
46
+ None
47
+
48
+ Returns:
49
+ A `str` of the form "x y" where x and y are the current mouse coordinates.
50
+ """
51
+ return await common.cursor_position(timeout=timeout)
52
+
53
+ return execute
54
+
55
+
56
+ @tool()
57
+ def computer_screenshot(timeout: int | None = None) -> Tool:
58
+ async def execute() -> ToolResult:
59
+ """
60
+ Take a screenshot.
61
+
62
+ Args:
63
+ None
64
+
65
+ Returns:
66
+ A `list` with a single `ContentImage` of the screen.
67
+ """
68
+ return await common.screenshot(timeout=timeout)
69
+
70
+ return execute
71
+
72
+
73
+ @tool()
74
+ def computer_mouse_move(timeout: int | None = None) -> Tool:
75
+ async def execute(x: int, y: int) -> ToolResult:
76
+ """
77
+ Move the cursor to a specified (x, y) pixel coordinate on the screen.
78
+
79
+ Args:
80
+ x: X coordinate of the mouse destination.
81
+ y: Y coordinate of the mouse destination.
82
+
83
+ Returns:
84
+ A `list` with a single `ContentImage` of the screen.
85
+ """
86
+ return await common.mouse_move(x, y, timeout=timeout)
87
+
88
+ return execute
89
+
90
+
91
+ @tool()
92
+ def computer_left_click(timeout: int | None = None) -> Tool:
93
+ async def execute() -> ToolResult:
94
+ """
95
+ Click the left mouse button.
96
+
97
+ Args:
98
+ None
99
+
100
+ Returns:
101
+ A `list` with a single `ContentImage` of the screen.
102
+ """
103
+ return await common.left_click(timeout=timeout)
104
+
105
+ return execute
106
+
107
+
108
+ @tool()
109
+ def computer_double_click(timeout: int | None = None) -> Tool:
110
+ async def execute() -> ToolResult:
111
+ """
112
+ Double-click the left mouse button.
113
+
114
+ Args:
115
+ None
116
+
117
+ Returns:
118
+ A `list` with a single `ContentImage` of the screen.
119
+ """
120
+ return await common.double_click(timeout=timeout)
121
+
122
+ return execute
123
+
124
+
125
+ @tool()
126
+ def computer_left_click_drag(timeout: int | None = None) -> Tool:
127
+ async def execute(x: int, y: int) -> ToolResult:
128
+ """
129
+ Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
130
+
131
+ Args:
132
+ x: X coordinate of the mouse destination.
133
+ y: Y coordinate of the mouse destination.
134
+
135
+ Returns:
136
+ A `list` with a single `ContentImage` of the screen.
137
+ """
138
+ return await common.left_click_drag(x, y, timeout=timeout)
139
+
140
+ return execute
141
+
142
+
143
+ @tool()
144
+ def computer_right_click(timeout: int | None = None) -> Tool:
145
+ async def execute() -> ToolResult:
146
+ """
147
+ Click the right mouse button.
148
+
149
+ Args:
150
+ None
151
+
152
+ Returns:
153
+ A `list` with a single `ContentImage` of the screen.
154
+ """
155
+ return await common.right_click(timeout=timeout)
156
+
157
+ return execute
158
+
159
+
160
+ # keysm list is from https://gist.github.com/rvaiya/be31f42049a4b5ad46666a8e120d9843
161
+ @tool()
162
+ def computer_key(timeout: int | None = None) -> Tool:
163
+ async def execute(key: str) -> ToolResult:
164
+ """
165
+ Press a key or key-combination on the keyboard.
166
+
167
+ Args:
168
+ key: The key or key-combination to press. Can be any key name supported by xdotool's `key` such as:
169
+ "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
170
+ "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
171
+ "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
172
+ "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
173
+ "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
174
+ "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal"
175
+
176
+ Returns:
177
+ A `list` with a single `ContentImage` of the screen.
178
+ """
179
+ return await common.press_key(key, timeout=timeout)
180
+
181
+ return execute
182
+
183
+
184
+ @tool()
185
+ def computer_type(timeout: int | None = None) -> Tool:
186
+ async def execute(text: str) -> ToolResult:
187
+ """
188
+ Type a string of text on the keyboard.
189
+
190
+ Args:
191
+ text: The text to type. If the text contains spaces, enclose it in quotes.
192
+
193
+ Returns:
194
+ A `list` with a single `ContentImage` of the screen.
195
+ """
196
+ return await common.type(text, timeout=timeout)
197
+
198
+ return execute
@@ -0,0 +1,100 @@
1
+ FROM docker.io/ubuntu:22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV DEBIAN_PRIORITY=high
5
+
6
+ # Core/system layer
7
+ RUN apt-get update && \
8
+ apt-get -y upgrade && \
9
+ apt-get -y install \
10
+ # A virtual framebuffer for running GUI applications without a physical display.
11
+ xvfb \
12
+ # A lightweight desktop environment for UNIX-like operating systems.
13
+ xfce4 \
14
+ # The terminal emulator for the xfce4 desktop environment.
15
+ xfce4-terminal\
16
+ # A VNC server for sharing X11 desktops.
17
+ x11vnc \
18
+ # A web based VNC client
19
+ novnc \
20
+ # A WebSocket to TCP proxy/bridge for noVNC
21
+ websockify \
22
+ # The Python programming language interpreter.
23
+ python3 \
24
+ # The package installer for Python.
25
+ python3-pip \
26
+ # A command-line tool for automating X11 applications (e.g., simulating keyboard/mouse inputs).
27
+ xdotool \
28
+ # A command-line tool for taking screenshots.
29
+ scrot \
30
+ # A suite for image manipulation — needed for scaling images.
31
+ imagemagick && \
32
+ apt-get clean
33
+
34
+ # Userland apt-get'able apps
35
+ RUN apt-get install -y --no-install-recommends \
36
+ # A simple image viewer.
37
+ xpaint \
38
+ # A calculator application.
39
+ galculator && \
40
+ apt-get clean
41
+
42
+ # install Firefox
43
+ RUN apt-get install -y software-properties-common && \
44
+ add-apt-repository ppa:mozillateam/ppa && \
45
+ apt-get update && \
46
+ apt-get install -y --no-install-recommends firefox-esr && \
47
+ apt-get clean
48
+
49
+ # install VS Code
50
+ RUN apt-get install -y \
51
+ gpg \
52
+ wget \
53
+ apt-transport-https \
54
+ software-properties-common && \
55
+ wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg && \
56
+ install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg && \
57
+ sh -c 'echo "deb [arch=amd64,arm64 signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" > /etc/apt/sources.list.d/vscode.list' && \
58
+ apt-get update && \
59
+ apt-get install -y code && \
60
+ apt-get clean
61
+
62
+ # configure noVNC
63
+ RUN ln -s /usr/share/novnc/vnc.html /usr/share/novnc/index.html
64
+
65
+ # We copy requirements.txt by itself so that changes to the scripts will be in a later layer
66
+ # and we only pip install if requirements.txt changes
67
+ COPY tool/requirements.txt /opt/inspect/tool/requirements.txt
68
+ RUN cd /opt/inspect/tool && pip3 install --no-cache-dir -r requirements.txt
69
+
70
+ COPY tool/ /opt/inspect/tool
71
+ COPY entrypoint/ /opt/inspect/entrypoint
72
+ RUN chmod -R 755 /opt/inspect
73
+
74
+ # setup user
75
+ ENV USERNAME=user
76
+ ENV HOME=/home/$USERNAME
77
+ RUN useradd -m -s /bin/bash -d $HOME $USERNAME
78
+ RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
79
+ USER ${USERNAME}
80
+ WORKDIR $HOME
81
+ COPY --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
82
+
83
+ # configure Firefox to skip all 'first run' UI
84
+ RUN mkdir -p $HOME/.mozilla/firefox-esr/profile.default && \
85
+ echo 'user_pref("browser.startup.homepage_override.mstone", "ignore");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
86
+ echo 'user_pref("browser.aboutwelcome.enabled", false);' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
87
+ echo 'user_pref("datareporting.policy.firstRunURL", "");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js
88
+
89
+ EXPOSE 5900
90
+ EXPOSE 6080
91
+
92
+ ARG DISPLAY_NUM=1
93
+ ARG WIDTH=1920
94
+ ARG HEIGHT=1080
95
+ ENV DISPLAY_NUM=$DISPLAY_NUM
96
+ ENV DISPLAY=:${DISPLAY_NUM}
97
+ ENV HEIGHT=$HEIGHT
98
+ ENV WIDTH=$WIDTH
99
+
100
+ ENTRYPOINT [ "/opt/inspect/entrypoint/entrypoint.sh" ]
@@ -0,0 +1,30 @@
1
+ # About This Image
2
+
3
+ This image was inspired by Anthropic's Computer Use Demo [here](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo/image).
4
+
5
+ Its goal is to provide the minimum infrastructure to support the use of Inspect's `computer_tool` to interact with the computer via X11 and `xdotool`, while also providing observability and interaction via VNC and noVNC.
6
+
7
+ The image extends this minimal functionality by adding a few basic applications — VS Code, Firefox, XPaint, and galculator.
8
+
9
+ ## Entrypoint Directory
10
+
11
+ 1. **Xvfb (X Virtual Framebuffer)**
12
+ - **Script:** `xvfb_startup.sh`
13
+ - **Description:** Xvfb is a display server that implements the X11 display server protocol. It runs in memory and does not require a physical display, useful for running graphical applications in a headless environment.
14
+
15
+ 1. **xfce4**
16
+ - **Script:** `xfce4_startup.sh`
17
+ - **Description:** xfce4 is a lightweight desktop environment for UNIX-like operating systems. It aims to be fast, low on system resources, and user-friendly.
18
+
19
+ 1. **x11vnc**
20
+ - **Script:** `x11vnc_startup.sh`
21
+ - **Description:** x11vnc is a VNC server that allows remote access to the X11 display. It enables users to connect to the virtual display environment from a remote machine using a VNC client.
22
+
23
+ 1. **noVNC**
24
+ - **Script:** `novnc_startup.sh`
25
+ - **Description:** noVNC is a VNC client that runs in a web browser. It allows users to access the virtual display environment through a web interface without needing a separate VNC client application.
26
+
27
+ ## Desktop Directory
28
+
29
+ The `Desktop` directory contains launchers for VS Code, Firefox and XPaint.
30
+
@@ -0,0 +1,18 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # remove marker files
5
+ rm -f /tmp/.X${DISPLAY_NUM}-lock
6
+ rm -f /tmp/xfce_started
7
+
8
+ /opt/inspect/entrypoint/xvfb_startup.sh
9
+ /opt/inspect/entrypoint/xfce_startup.sh
10
+ /opt/inspect/entrypoint/x11vnc_startup.sh
11
+ /opt/inspect/entrypoint/novnc_startup.sh
12
+
13
+ # Run CMD if provided
14
+ echo "Executing CMD from derived Dockerfile: $@"
15
+ exec "$@"
16
+
17
+ # Keep the container running
18
+ tail -f /dev/null
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+ echo "starting noVNC"
3
+
4
+ # Start noVNC with explicit websocket settings
5
+ websockify \
6
+ --web=/usr/share/novnc/ \
7
+ 6080 localhost:5900 \
8
+ > /tmp/novnc.log 2>&1 &
9
+
10
+ # Wait for noVNC to start
11
+ timeout=10
12
+ while [ $timeout -gt 0 ]; do
13
+ if netstat -tuln | grep -q ":6080 "; then
14
+ break
15
+ fi
16
+ sleep 1
17
+ ((timeout--))
18
+ done
19
+
20
+ echo "noVNC started successfully"
@@ -0,0 +1,48 @@
1
+ #!/bin/bash
2
+ echo "starting vnc"
3
+
4
+ (x11vnc -display $DISPLAY \
5
+ -forever \
6
+ -shared \
7
+ -wait 50 \
8
+ -cursor most \
9
+ -cursor arrow \
10
+ -rfbport 5900 \
11
+ -nopw \
12
+ 2>/tmp/x11vnc_stderr.log) &
13
+
14
+ x11vnc_pid=$!
15
+
16
+ # Wait for x11vnc to start
17
+ timeout=10
18
+ while [ $timeout -gt 0 ]; do
19
+ if netstat -tuln | grep -q ":5900 "; then
20
+ break
21
+ fi
22
+ sleep 1
23
+ ((timeout--))
24
+ done
25
+
26
+ if [ $timeout -eq 0 ]; then
27
+ echo "x11vnc failed to start, stderr output:" >&2
28
+ cat /tmp/x11vnc_stderr.log >&2
29
+ exit 1
30
+ fi
31
+
32
+ : > /tmp/x11vnc_stderr.log
33
+
34
+ # Monitor x11vnc process in the background
35
+ (
36
+ while true; do
37
+ if ! kill -0 $x11vnc_pid 2>/dev/null; then
38
+ echo "x11vnc process crashed, restarting..." >&2
39
+ if [ -f /tmp/x11vnc_stderr.log ]; then
40
+ echo "x11vnc stderr output:" >&2
41
+ cat /tmp/x11vnc_stderr.log >&2
42
+ rm /tmp/x11vnc_stderr.log
43
+ fi
44
+ exec "$0"
45
+ fi
46
+ sleep 5
47
+ done
48
+ ) &
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ echo "starting XFCE4"
4
+ startxfce4 &
5
+
6
+ while ! pgrep -x "xfce4-session" > /dev/null; do
7
+ echo "Waiting for XFCE4 to start..."
8
+ sleep 1
9
+ done
10
+
11
+ echo "XFCE4 is fully started!"
12
+ touch /tmp/xfce_started
13
+
@@ -0,0 +1,48 @@
1
+ #!/bin/bash
2
+ set -e # Exit on error
3
+
4
+ DPI=96
5
+ RES_AND_DEPTH=${WIDTH}x${HEIGHT}x24
6
+
7
+ # Function to check if Xvfb is already running
8
+ check_xvfb_running() {
9
+ if [ -e /tmp/.X${DISPLAY_NUM}-lock ]; then
10
+ return 0 # Xvfb is already running
11
+ else
12
+ return 1 # Xvfb is not running
13
+ fi
14
+ }
15
+
16
+ # Function to check if Xvfb is ready
17
+ wait_for_xvfb() {
18
+ local timeout=10
19
+ local start_time=$(date +%s)
20
+ while ! xdpyinfo >/dev/null 2>&1; do
21
+ if [ $(($(date +%s) - start_time)) -gt $timeout ]; then
22
+ echo "Xvfb failed to start within $timeout seconds" >&2
23
+ return 1
24
+ fi
25
+ sleep 0.1
26
+ done
27
+ return 0
28
+ }
29
+
30
+ # Check if Xvfb is already running
31
+ if check_xvfb_running; then
32
+ echo "Xvfb is already running on display ${DISPLAY}"
33
+ exit 0
34
+ fi
35
+
36
+ # Start Xvfb
37
+ Xvfb $DISPLAY -ac -screen 0 $RES_AND_DEPTH -retro -dpi $DPI -nolisten tcp -nolisten unix &
38
+ XVFB_PID=$!
39
+
40
+ # Wait for Xvfb to start
41
+ if wait_for_xvfb; then
42
+ echo "Xvfb started successfully on display ${DISPLAY}"
43
+ echo "Xvfb PID: $XVFB_PID"
44
+ else
45
+ echo "Xvfb failed to start"
46
+ kill $XVFB_PID
47
+ exit 1
48
+ fi
@@ -0,0 +1,10 @@
1
+ [Desktop Entry]
2
+ Version=1.0
3
+ Type=Application
4
+ Name=Firefox Web Browser
5
+ Comment=Browse the World Wide Web
6
+ Exec=firefox-esr %u
7
+ Icon=firefox-esr
8
+ Path=
9
+ Terminal=false
10
+ StartupNotify=true
@@ -0,0 +1,10 @@
1
+ [Desktop Entry]
2
+ Version=1.0
3
+ Type=Application
4
+ Name=Visual Studio Code
5
+ Comment=Code Editing. Redefined.
6
+ Exec=/usr/share/code/code %F
7
+ Icon=vscode
8
+ Path=
9
+ Terminal=false
10
+ StartupNotify=false
@@ -0,0 +1,10 @@
1
+ [Desktop Entry]
2
+ Version=1.0
3
+ Type=Application
4
+ Name=XPaint
5
+ Comment=Xpaint painting application
6
+ Exec=xpaint
7
+ Icon=xpaint
8
+ Path=
9
+ Terminal=false
10
+ StartupNotify=false
@@ -0,0 +1,22 @@
1
+ import logging
2
+
3
+
4
+ def setup_logger(level=logging.INFO):
5
+ """
6
+ This logger emits all of its output to PID 1's stdout.
7
+
8
+ This makes it so that logging from invocations of the computer_tool cli show up in `docker logs` output.
9
+ """
10
+ new_logger = logging.getLogger("computer_tool")
11
+ new_logger.setLevel(level)
12
+
13
+ stdout_handler = logging.FileHandler("/proc/1/fd/1", mode="w")
14
+ stdout_handler.setLevel(level)
15
+ stdout_handler.setFormatter(
16
+ logging.Formatter("%(name)s(pid=%(process)d) - %(levelname)s - %(message)s")
17
+ )
18
+
19
+ if not new_logger.handlers:
20
+ new_logger.addHandler(stdout_handler)
21
+
22
+ return new_logger
@@ -0,0 +1,42 @@
1
+ """Utility to run shell commands asynchronously with a timeout."""
2
+
3
+ import asyncio
4
+
5
+ TRUNCATED_MESSAGE: str = "<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>"
6
+ MAX_RESPONSE_LEN: int = 16000
7
+
8
+
9
+ def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
10
+ """Truncate content and append a notice if content exceeds the specified length."""
11
+ return (
12
+ content
13
+ if not truncate_after or len(content) <= truncate_after
14
+ else content[:truncate_after] + TRUNCATED_MESSAGE
15
+ )
16
+
17
+
18
+ async def run(
19
+ cmd: str,
20
+ timeout: float | None = 120.0, # seconds
21
+ truncate_after: int | None = MAX_RESPONSE_LEN,
22
+ ):
23
+ """Run a shell command asynchronously with a timeout."""
24
+ process = await asyncio.create_subprocess_shell(
25
+ cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
26
+ )
27
+
28
+ try:
29
+ stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
30
+ return (
31
+ process.returncode or 0,
32
+ maybe_truncate(stdout.decode(), truncate_after=truncate_after),
33
+ maybe_truncate(stderr.decode(), truncate_after=truncate_after),
34
+ )
35
+ except asyncio.TimeoutError as exc:
36
+ try:
37
+ process.kill()
38
+ except ProcessLookupError:
39
+ pass
40
+ raise TimeoutError(
41
+ f"Command '{cmd}' timed out after {timeout} seconds"
42
+ ) from exc
@@ -0,0 +1,33 @@
1
+ from dataclasses import dataclass, fields, replace
2
+
3
+
4
+ @dataclass(kw_only=True, frozen=True)
5
+ class ToolResult:
6
+ """Represents the result of a tool execution."""
7
+
8
+ output: str | None = None
9
+ error: str | None = None
10
+ base64_image: str | None = None
11
+
12
+ def __bool__(self):
13
+ return any(getattr(self, field.name) for field in fields(self))
14
+
15
+ def __add__(self, other: "ToolResult"):
16
+ def combine_fields(
17
+ field: str | None, other_field: str | None, concatenate: bool = True
18
+ ):
19
+ if field and other_field:
20
+ if concatenate:
21
+ return field + other_field
22
+ raise ValueError("Cannot combine tool results")
23
+ return field or other_field
24
+
25
+ return ToolResult(
26
+ output=combine_fields(self.output, other.output),
27
+ error=combine_fields(self.error, other.error),
28
+ base64_image=combine_fields(self.base64_image, other.base64_image, False),
29
+ )
30
+
31
+ def replace(self, **kwargs):
32
+ """Returns a new ToolResult with the given fields replaced."""
33
+ return replace(self, **kwargs)