inspect-ai 0.3.58__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -2
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +78 -11
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/score.py +1 -0
  13. inspect_ai/_eval/task/results.py +50 -22
  14. inspect_ai/_eval/task/run.py +41 -7
  15. inspect_ai/_eval/task/sandbox.py +10 -5
  16. inspect_ai/_util/constants.py +1 -0
  17. inspect_ai/_util/port_names.py +61 -0
  18. inspect_ai/_util/text.py +23 -0
  19. inspect_ai/_view/www/App.css +31 -1
  20. inspect_ai/_view/www/dist/assets/index.css +31 -1
  21. inspect_ai/_view/www/dist/assets/index.js +25344 -1849
  22. inspect_ai/_view/www/log-schema.json +32 -2
  23. inspect_ai/_view/www/package.json +2 -0
  24. inspect_ai/_view/www/src/App.mjs +8 -10
  25. inspect_ai/_view/www/src/Types.mjs +0 -1
  26. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  27. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  28. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  29. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  30. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  31. inspect_ai/_view/www/src/index.js +75 -2
  32. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  33. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  34. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  35. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  36. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  37. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  38. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +24 -12
  39. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  40. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  41. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  42. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  43. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  44. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  45. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  46. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  47. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  48. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  49. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  50. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  51. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  52. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  53. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  54. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  55. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  56. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  57. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  58. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  59. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  60. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  61. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  62. inspect_ai/_view/www/vite.config.js +7 -0
  63. inspect_ai/_view/www/yarn.lock +116 -0
  64. inspect_ai/approval/_human/__init__.py +0 -0
  65. inspect_ai/approval/_policy.py +12 -6
  66. inspect_ai/log/_log.py +1 -1
  67. inspect_ai/log/_samples.py +16 -0
  68. inspect_ai/log/_transcript.py +4 -1
  69. inspect_ai/model/_call_tools.py +4 -0
  70. inspect_ai/model/_conversation.py +20 -8
  71. inspect_ai/model/_generate_config.py +10 -4
  72. inspect_ai/model/_model.py +117 -18
  73. inspect_ai/model/_model_output.py +7 -2
  74. inspect_ai/model/_providers/anthropic.py +100 -44
  75. inspect_ai/model/_providers/azureai.py +20 -20
  76. inspect_ai/model/_providers/bedrock.py +37 -40
  77. inspect_ai/model/_providers/google.py +46 -54
  78. inspect_ai/model/_providers/mistral.py +11 -11
  79. inspect_ai/model/_providers/openai.py +15 -16
  80. inspect_ai/model/_providers/openai_o1.py +9 -8
  81. inspect_ai/model/_providers/providers.py +1 -1
  82. inspect_ai/model/_providers/together.py +8 -8
  83. inspect_ai/model/_providers/vertex.py +1 -4
  84. inspect_ai/scorer/_reducer/reducer.py +1 -1
  85. inspect_ai/scorer/_scorer.py +2 -2
  86. inspect_ai/solver/__init__.py +2 -5
  87. inspect_ai/solver/_prompt.py +35 -5
  88. inspect_ai/solver/_task_state.py +80 -38
  89. inspect_ai/tool/__init__.py +2 -0
  90. inspect_ai/tool/_tool.py +12 -1
  91. inspect_ai/tool/_tool_call.py +10 -0
  92. inspect_ai/tool/_tool_def.py +16 -5
  93. inspect_ai/tool/_tool_with.py +21 -4
  94. inspect_ai/tool/beta/__init__.py +5 -0
  95. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  96. inspect_ai/tool/beta/_computer/_common.py +133 -0
  97. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  98. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  99. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  100. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  101. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  102. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  103. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  104. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  105. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  106. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  107. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  108. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  109. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  110. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  111. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  112. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  113. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  114. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  115. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  116. inspect_ai/util/__init__.py +2 -0
  117. inspect_ai/util/_limit.py +26 -0
  118. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  119. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  120. inspect_ai/util/_sandbox/environment.py +14 -0
  121. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  122. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +126 -98
  123. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  124. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  125. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  126. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  127. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,155 @@
1
+ from typing import Awaitable, Callable
2
+
3
+ from inspect_ai._util.content import Content, ContentImage, ContentText
4
+ from inspect_ai.tool import Tool, ToolResult, tool
5
+ from inspect_ai.tool._tool import (
6
+ TOOL_INIT_MODEL_INPUT,
7
+ ToolParsingError,
8
+ )
9
+ from inspect_ai.tool._tool_call import ToolCallModelInput
10
+
11
+ from . import _common as common
12
+ from ._common import Action
13
+
14
+ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
15
+
16
+
17
+ @tool
18
+ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
19
+ async def execute(
20
+ action: Action,
21
+ text: str | None = None,
22
+ coordinate: list[int] | None = None,
23
+ ) -> ToolResult:
24
+ """
25
+ Use this tool to interact with a computer.
26
+
27
+ Use a mouse and keyboard to interact with a computer's desktop GUI.
28
+
29
+ Keep in mind that icons require double clicks to open while other UI affordances like menu items and buttons require a single click.
30
+
31
+ Args:
32
+ action (Action): The action to perform.
33
+ - `key`: Press a key or key-combination on the keyboard.
34
+ - Example: execute(action="key", text="ctrl+s")
35
+ - Text can be any key name supported by xdotool's `key` such as:
36
+ "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
37
+ "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
38
+ "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
39
+ "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
40
+ "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
41
+ "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal",
42
+ - `type`: Type a string of text on the keyboard. If the text contains spaces, enclose it in quotes.
43
+ - Example: execute(action="type", text="The crux of the biscuit is the apostrophe!")
44
+ - `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
45
+ - `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
46
+ - Example: execute(action="mouse_move", coordinate=(100, 200))
47
+ - `left_click`: Click the left mouse button.
48
+ - `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
49
+ - Example: execute(action="left_click_drag", coordinate=(150, 250))
50
+ - `right_click`: Click the right mouse button.
51
+ - `middle_click`: Click the middle mouse button.
52
+ - `double_click`: Double-click the left mouse button.
53
+ - `screenshot`: Take a screenshot.
54
+ text (str | None): The text to type or the key to press. Required when action is "key" or "type".
55
+ coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen to which to move or drag. Required when action is "mouse_move" or "left_click_drag".
56
+
57
+ Returns:
58
+ The output of the command. Many commands will include a screenshot reflecting the result of the command in their output.
59
+ """
60
+ if action in ("mouse_move", "left_click_drag"):
61
+ if coordinate is None:
62
+ raise ToolParsingError(f"coordinate is required for {action}")
63
+ if text is not None:
64
+ raise ToolParsingError(f"text is not accepted for {action}")
65
+ if not isinstance(coordinate, list) or len(coordinate) != 2:
66
+ raise ToolParsingError(f"{coordinate} must be a tuple of length 2")
67
+ if not all(isinstance(i, int) and i >= 0 for i in coordinate):
68
+ raise ToolParsingError(
69
+ f"{coordinate} must be a tuple of non-negative ints"
70
+ )
71
+
72
+ if action == "mouse_move":
73
+ return await common.mouse_move(
74
+ coordinate[0], coordinate[1], timeout=timeout
75
+ )
76
+ elif action == "left_click_drag":
77
+ return await common.left_click_drag(
78
+ coordinate[0], coordinate[1], timeout=timeout
79
+ )
80
+
81
+ if action in ("key", "type"):
82
+ if text is None:
83
+ raise ToolParsingError(f"text is required for {action}")
84
+ if coordinate is not None:
85
+ raise ToolParsingError(f"coordinate is not accepted for {action}")
86
+ if not isinstance(text, str):
87
+ raise ToolParsingError(output=f"{text} must be a string")
88
+
89
+ if action == "key":
90
+ return await common.press_key(text, timeout=timeout)
91
+ elif action == "type":
92
+ return await common.type(text, timeout=timeout)
93
+
94
+ if action in (
95
+ "left_click",
96
+ "right_click",
97
+ "double_click",
98
+ "middle_click",
99
+ "screenshot",
100
+ "cursor_position",
101
+ ):
102
+ if text is not None:
103
+ raise ToolParsingError(f"text is not accepted for {action}")
104
+ if coordinate is not None:
105
+ raise ToolParsingError(f"coordinate is not accepted for {action}")
106
+
107
+ if action == "screenshot":
108
+ return await common.screenshot(timeout=timeout)
109
+ elif action == "cursor_position":
110
+ return await common.cursor_position(timeout=timeout)
111
+ elif action == "left_click":
112
+ return await common.left_click(timeout=timeout)
113
+ elif action == "right_click":
114
+ return await common.right_click(timeout=timeout)
115
+ elif action == "middle_click":
116
+ return await common.middle_click(timeout=timeout)
117
+ elif action == "double_click":
118
+ return await common.double_click(timeout=timeout)
119
+
120
+ raise ToolParsingError(f"Invalid action: {action}")
121
+
122
+ # if max_screenshots is specified then polk model input into where @tool can find it
123
+ if max_screenshots is not None:
124
+ setattr(execute, TOOL_INIT_MODEL_INPUT, _computer_model_input(max_screenshots))
125
+
126
+ return execute
127
+
128
+
129
+ def _computer_model_input(max_screenshots: int) -> ToolCallModelInput:
130
+ def model_input(
131
+ message_index: int, message_total: int, content: str | list[Content]
132
+ ) -> str | list[Content]:
133
+ # nothing to do for scalars
134
+ if isinstance(content, str):
135
+ return content
136
+
137
+ # if we are inside max_screenshots then return as is
138
+ elif (message_total - message_index) <= max_screenshots:
139
+ return content
140
+
141
+ # otherwise convert images to text placeholdrs
142
+ else:
143
+ input_content: list[Content] = []
144
+ for c in content:
145
+ if isinstance(c, ContentImage):
146
+ input_content.append(
147
+ ContentText(
148
+ text="Screenshot removed to reduce size of input. Please consult the latest screenshots for the most up to date state of the screen."
149
+ )
150
+ )
151
+ else:
152
+ input_content.append(c)
153
+ return input_content
154
+
155
+ return model_input
@@ -0,0 +1,198 @@
1
+ """
2
+ This module provides the same functionality as the computer tool but via a list of per-action tools . e.g. computer_mouse_move(100, 100).
3
+
4
+ The split version is not publicly exported, but is retained until we decide if it performs better than the monolithic computer tool.
5
+ """
6
+
7
+ from typing import Awaitable, Callable
8
+
9
+ from inspect_ai.tool import Tool, ToolResult, tool
10
+
11
+ from . import _common as common
12
+
13
+ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
14
+
15
+
16
+ def computer_split(timeout: int | None = None) -> list[Tool]:
17
+ """
18
+ Computer interaction tools.
19
+
20
+ Args:
21
+ timeout (int | None): Timeout (in seconds) for command.
22
+
23
+ Returns:
24
+ List of computer interaction tools.
25
+ """
26
+ return [
27
+ computer_cursor_position(),
28
+ computer_screenshot(),
29
+ computer_mouse_move(),
30
+ computer_left_click(),
31
+ computer_double_click(),
32
+ computer_left_click_drag(),
33
+ computer_right_click(),
34
+ computer_key(),
35
+ computer_type(),
36
+ ]
37
+
38
+
39
+ @tool()
40
+ def computer_cursor_position(timeout: int | None = None) -> Tool:
41
+ async def execute() -> ToolResult:
42
+ """
43
+ Get the current (x, y) pixel coordinate of the cursor on the screen.
44
+
45
+ Args:
46
+ None
47
+
48
+ Returns:
49
+ A `str` of the form "x y" where x and y are the current mouse coordinates.
50
+ """
51
+ return await common.cursor_position(timeout=timeout)
52
+
53
+ return execute
54
+
55
+
56
+ @tool()
57
+ def computer_screenshot(timeout: int | None = None) -> Tool:
58
+ async def execute() -> ToolResult:
59
+ """
60
+ Take a screenshot.
61
+
62
+ Args:
63
+ None
64
+
65
+ Returns:
66
+ A `list` with a single `ContentImage` of the screen.
67
+ """
68
+ return await common.screenshot(timeout=timeout)
69
+
70
+ return execute
71
+
72
+
73
+ @tool()
74
+ def computer_mouse_move(timeout: int | None = None) -> Tool:
75
+ async def execute(x: int, y: int) -> ToolResult:
76
+ """
77
+ Move the cursor to a specified (x, y) pixel coordinate on the screen.
78
+
79
+ Args:
80
+ x: X coordinate of the mouse destination.
81
+ y: Y coordinate of the mouse destination.
82
+
83
+ Returns:
84
+ A `list` with a single `ContentImage` of the screen.
85
+ """
86
+ return await common.mouse_move(x, y, timeout=timeout)
87
+
88
+ return execute
89
+
90
+
91
+ @tool()
92
+ def computer_left_click(timeout: int | None = None) -> Tool:
93
+ async def execute() -> ToolResult:
94
+ """
95
+ Click the left mouse button.
96
+
97
+ Args:
98
+ None
99
+
100
+ Returns:
101
+ A `list` with a single `ContentImage` of the screen.
102
+ """
103
+ return await common.left_click(timeout=timeout)
104
+
105
+ return execute
106
+
107
+
108
+ @tool()
109
+ def computer_double_click(timeout: int | None = None) -> Tool:
110
+ async def execute() -> ToolResult:
111
+ """
112
+ Double-click the left mouse button.
113
+
114
+ Args:
115
+ None
116
+
117
+ Returns:
118
+ A `list` with a single `ContentImage` of the screen.
119
+ """
120
+ return await common.double_click(timeout=timeout)
121
+
122
+ return execute
123
+
124
+
125
+ @tool()
126
+ def computer_left_click_drag(timeout: int | None = None) -> Tool:
127
+ async def execute(x: int, y: int) -> ToolResult:
128
+ """
129
+ Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
130
+
131
+ Args:
132
+ x: X coordinate of the mouse destination.
133
+ y: Y coordinate of the mouse destination.
134
+
135
+ Returns:
136
+ A `list` with a single `ContentImage` of the screen.
137
+ """
138
+ return await common.left_click_drag(x, y, timeout=timeout)
139
+
140
+ return execute
141
+
142
+
143
+ @tool()
144
+ def computer_right_click(timeout: int | None = None) -> Tool:
145
+ async def execute() -> ToolResult:
146
+ """
147
+ Click the right mouse button.
148
+
149
+ Args:
150
+ None
151
+
152
+ Returns:
153
+ A `list` with a single `ContentImage` of the screen.
154
+ """
155
+ return await common.right_click(timeout=timeout)
156
+
157
+ return execute
158
+
159
+
160
+ # keysm list is from https://gist.github.com/rvaiya/be31f42049a4b5ad46666a8e120d9843
161
+ @tool()
162
+ def computer_key(timeout: int | None = None) -> Tool:
163
+ async def execute(key: str) -> ToolResult:
164
+ """
165
+ Press a key or key-combination on the keyboard.
166
+
167
+ Args:
168
+ key: The key or key-combination to press. Can be any key name supported by xdotool's `key` such as:
169
+ "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
170
+ "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
171
+ "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
172
+ "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
173
+ "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
174
+ "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal"
175
+
176
+ Returns:
177
+ A `list` with a single `ContentImage` of the screen.
178
+ """
179
+ return await common.press_key(key, timeout=timeout)
180
+
181
+ return execute
182
+
183
+
184
+ @tool()
185
+ def computer_type(timeout: int | None = None) -> Tool:
186
+ async def execute(text: str) -> ToolResult:
187
+ """
188
+ Type a string of text on the keyboard.
189
+
190
+ Args:
191
+ text: The text to type. If the text contains spaces, enclose it in quotes.
192
+
193
+ Returns:
194
+ A `list` with a single `ContentImage` of the screen.
195
+ """
196
+ return await common.type(text, timeout=timeout)
197
+
198
+ return execute
@@ -0,0 +1,100 @@
1
+ FROM docker.io/ubuntu:22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV DEBIAN_PRIORITY=high
5
+
6
+ # Core/system layer
7
+ RUN apt-get update && \
8
+ apt-get -y upgrade && \
9
+ apt-get -y install \
10
+ # A virtual framebuffer for running GUI applications without a physical display.
11
+ xvfb \
12
+ # A lightweight desktop environment for UNIX-like operating systems.
13
+ xfce4 \
14
+ # The terminal emulator for the xfce4 desktop environment.
15
+ xfce4-terminal\
16
+ # A VNC server for sharing X11 desktops.
17
+ x11vnc \
18
+ # A web based VNC client
19
+ novnc \
20
+ # A WebSocket to TCP proxy/bridge for noVNC
21
+ websockify \
22
+ # The Python programming language interpreter.
23
+ python3 \
24
+ # The package installer for Python.
25
+ python3-pip \
26
+ # A command-line tool for automating X11 applications (e.g., simulating keyboard/mouse inputs).
27
+ xdotool \
28
+ # A command-line tool for taking screenshots.
29
+ scrot \
30
+ # A suite for image manipulation — needed for scaling images.
31
+ imagemagick && \
32
+ apt-get clean
33
+
34
+ # Userland apt-get'able apps
35
+ RUN apt-get install -y --no-install-recommends \
36
+ # A simple image viewer.
37
+ xpaint \
38
+ # A calculator application.
39
+ galculator && \
40
+ apt-get clean
41
+
42
+ # install Firefox
43
+ RUN apt-get install -y software-properties-common && \
44
+ add-apt-repository ppa:mozillateam/ppa && \
45
+ apt-get update && \
46
+ apt-get install -y --no-install-recommends firefox-esr && \
47
+ apt-get clean
48
+
49
+ # install VS Code
50
+ RUN apt-get install -y \
51
+ gpg \
52
+ wget \
53
+ apt-transport-https \
54
+ software-properties-common && \
55
+ wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg && \
56
+ install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg && \
57
+ sh -c 'echo "deb [arch=amd64,arm64 signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" > /etc/apt/sources.list.d/vscode.list' && \
58
+ apt-get update && \
59
+ apt-get install -y code && \
60
+ apt-get clean
61
+
62
+ # configure noVNC
63
+ RUN ln -s /usr/share/novnc/vnc.html /usr/share/novnc/index.html
64
+
65
+ # We copy requirements.txt by itself so that changes to the scripts will be in a later layer
66
+ # and we only pip install if requirements.txt changes
67
+ COPY tool/requirements.txt /opt/inspect/tool/requirements.txt
68
+ RUN cd /opt/inspect/tool && pip3 install --no-cache-dir -r requirements.txt
69
+
70
+ COPY tool/ /opt/inspect/tool
71
+ COPY entrypoint/ /opt/inspect/entrypoint
72
+ RUN chmod -R 755 /opt/inspect
73
+
74
+ # setup user
75
+ ENV USERNAME=user
76
+ ENV HOME=/home/$USERNAME
77
+ RUN useradd -m -s /bin/bash -d $HOME $USERNAME
78
+ RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
79
+ USER ${USERNAME}
80
+ WORKDIR $HOME
81
+ COPY --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
82
+
83
+ # configure Firefox to skip all 'first run' UI
84
+ RUN mkdir -p $HOME/.mozilla/firefox-esr/profile.default && \
85
+ echo 'user_pref("browser.startup.homepage_override.mstone", "ignore");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
86
+ echo 'user_pref("browser.aboutwelcome.enabled", false);' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
87
+ echo 'user_pref("datareporting.policy.firstRunURL", "");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js
88
+
89
+ EXPOSE 5900
90
+ EXPOSE 6080
91
+
92
+ ARG DISPLAY_NUM=1
93
+ ARG WIDTH=1920
94
+ ARG HEIGHT=1080
95
+ ENV DISPLAY_NUM=$DISPLAY_NUM
96
+ ENV DISPLAY=:${DISPLAY_NUM}
97
+ ENV HEIGHT=$HEIGHT
98
+ ENV WIDTH=$WIDTH
99
+
100
+ ENTRYPOINT [ "/opt/inspect/entrypoint/entrypoint.sh" ]
@@ -0,0 +1,30 @@
1
+ # About This Image
2
+
3
+ This image was inspired by Anthropic's Computer Use Demo [here](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo/image).
4
+
5
+ Its goal is to provide the minimum infrastructure to support the use of Inspect's `computer_tool` to interact with the computer via X11 and `xdotool`, while also providing observability and interaction via VNC and noVNC.
6
+
7
+ The image extends this minimal functionality by adding a few basic applications — VS Code, Firefox, XPaint, and galculator.
8
+
9
+ ## Entrypoint Directory
10
+
11
+ 1. **Xvfb (X Virtual Framebuffer)**
12
+ - **Script:** `xvfb_startup.sh`
13
+ - **Description:** Xvfb is a display server that implements the X11 display server protocol. It runs in memory and does not require a physical display, useful for running graphical applications in a headless environment.
14
+
15
+ 1. **xfce4**
16
+ - **Script:** `xfce4_startup.sh`
17
+ - **Description:** xfce4 is a lightweight desktop environment for UNIX-like operating systems. It aims to be fast, low on system resources, and user-friendly.
18
+
19
+ 1. **x11vnc**
20
+ - **Script:** `x11vnc_startup.sh`
21
+ - **Description:** x11vnc is a VNC server that allows remote access to the X11 display. It enables users to connect to the virtual display environment from a remote machine using a VNC client.
22
+
23
+ 1. **noVNC**
24
+ - **Script:** `novnc_startup.sh`
25
+ - **Description:** noVNC is a VNC client that runs in a web browser. It allows users to access the virtual display environment through a web interface without needing a separate VNC client application.
26
+
27
+ ## Desktop Directory
28
+
29
+ The `Desktop` directory contains launchers for VS Code, Firefox and XPaint.
30
+
@@ -0,0 +1,18 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # remove marker files
5
+ rm -f /tmp/.X${DISPLAY_NUM}-lock
6
+ rm -f /tmp/xfce_started
7
+
8
+ /opt/inspect/entrypoint/xvfb_startup.sh
9
+ /opt/inspect/entrypoint/xfce_startup.sh
10
+ /opt/inspect/entrypoint/x11vnc_startup.sh
11
+ /opt/inspect/entrypoint/novnc_startup.sh
12
+
13
+ # Run CMD if provided
14
+ echo "Executing CMD from derived Dockerfile: $@"
15
+ exec "$@"
16
+
17
+ # Keep the container running
18
+ tail -f /dev/null
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+ echo "starting noVNC"
3
+
4
+ # Start noVNC with explicit websocket settings
5
+ websockify \
6
+ --web=/usr/share/novnc/ \
7
+ 6080 localhost:5900 \
8
+ > /tmp/novnc.log 2>&1 &
9
+
10
+ # Wait for noVNC to start
11
+ timeout=10
12
+ while [ $timeout -gt 0 ]; do
13
+ if netstat -tuln | grep -q ":6080 "; then
14
+ break
15
+ fi
16
+ sleep 1
17
+ ((timeout--))
18
+ done
19
+
20
+ echo "noVNC started successfully"
@@ -0,0 +1,48 @@
1
+ #!/bin/bash
2
+ echo "starting vnc"
3
+
4
+ (x11vnc -display $DISPLAY \
5
+ -forever \
6
+ -shared \
7
+ -wait 50 \
8
+ -cursor most \
9
+ -cursor arrow \
10
+ -rfbport 5900 \
11
+ -nopw \
12
+ 2>/tmp/x11vnc_stderr.log) &
13
+
14
+ x11vnc_pid=$!
15
+
16
+ # Wait for x11vnc to start
17
+ timeout=10
18
+ while [ $timeout -gt 0 ]; do
19
+ if netstat -tuln | grep -q ":5900 "; then
20
+ break
21
+ fi
22
+ sleep 1
23
+ ((timeout--))
24
+ done
25
+
26
+ if [ $timeout -eq 0 ]; then
27
+ echo "x11vnc failed to start, stderr output:" >&2
28
+ cat /tmp/x11vnc_stderr.log >&2
29
+ exit 1
30
+ fi
31
+
32
+ : > /tmp/x11vnc_stderr.log
33
+
34
+ # Monitor x11vnc process in the background
35
+ (
36
+ while true; do
37
+ if ! kill -0 $x11vnc_pid 2>/dev/null; then
38
+ echo "x11vnc process crashed, restarting..." >&2
39
+ if [ -f /tmp/x11vnc_stderr.log ]; then
40
+ echo "x11vnc stderr output:" >&2
41
+ cat /tmp/x11vnc_stderr.log >&2
42
+ rm /tmp/x11vnc_stderr.log
43
+ fi
44
+ exec "$0"
45
+ fi
46
+ sleep 5
47
+ done
48
+ ) &
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ echo "starting XFCE4"
4
+ startxfce4 &
5
+
6
+ while ! pgrep -x "xfce4-session" > /dev/null; do
7
+ echo "Waiting for XFCE4 to start..."
8
+ sleep 1
9
+ done
10
+
11
+ echo "XFCE4 is fully started!"
12
+ touch /tmp/xfce_started
13
+
@@ -0,0 +1,48 @@
1
+ #!/bin/bash
2
+ set -e # Exit on error
3
+
4
+ DPI=96
5
+ RES_AND_DEPTH=${WIDTH}x${HEIGHT}x24
6
+
7
+ # Function to check if Xvfb is already running
8
+ check_xvfb_running() {
9
+ if [ -e /tmp/.X${DISPLAY_NUM}-lock ]; then
10
+ return 0 # Xvfb is already running
11
+ else
12
+ return 1 # Xvfb is not running
13
+ fi
14
+ }
15
+
16
+ # Function to check if Xvfb is ready
17
+ wait_for_xvfb() {
18
+ local timeout=10
19
+ local start_time=$(date +%s)
20
+ while ! xdpyinfo >/dev/null 2>&1; do
21
+ if [ $(($(date +%s) - start_time)) -gt $timeout ]; then
22
+ echo "Xvfb failed to start within $timeout seconds" >&2
23
+ return 1
24
+ fi
25
+ sleep 0.1
26
+ done
27
+ return 0
28
+ }
29
+
30
+ # Check if Xvfb is already running
31
+ if check_xvfb_running; then
32
+ echo "Xvfb is already running on display ${DISPLAY}"
33
+ exit 0
34
+ fi
35
+
36
+ # Start Xvfb
37
+ Xvfb $DISPLAY -ac -screen 0 $RES_AND_DEPTH -retro -dpi $DPI -nolisten tcp -nolisten unix &
38
+ XVFB_PID=$!
39
+
40
+ # Wait for Xvfb to start
41
+ if wait_for_xvfb; then
42
+ echo "Xvfb started successfully on display ${DISPLAY}"
43
+ echo "Xvfb PID: $XVFB_PID"
44
+ else
45
+ echo "Xvfb failed to start"
46
+ kill $XVFB_PID
47
+ exit 1
48
+ fi
@@ -0,0 +1,10 @@
1
+ [Desktop Entry]
2
+ Version=1.0
3
+ Type=Application
4
+ Name=Firefox Web Browser
5
+ Comment=Browse the World Wide Web
6
+ Exec=firefox-esr %u
7
+ Icon=firefox-esr
8
+ Path=
9
+ Terminal=false
10
+ StartupNotify=true
@@ -0,0 +1,10 @@
1
+ [Desktop Entry]
2
+ Version=1.0
3
+ Type=Application
4
+ Name=Visual Studio Code
5
+ Comment=Code Editing. Redefined.
6
+ Exec=/usr/share/code/code %F
7
+ Icon=vscode
8
+ Path=
9
+ Terminal=false
10
+ StartupNotify=false
@@ -0,0 +1,10 @@
1
+ [Desktop Entry]
2
+ Version=1.0
3
+ Type=Application
4
+ Name=XPaint
5
+ Comment=Xpaint painting application
6
+ Exec=xpaint
7
+ Icon=xpaint
8
+ Path=
9
+ Terminal=false
10
+ StartupNotify=false