inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/common.py +7 -3
  3. inspect_ai/_cli/eval.py +17 -2
  4. inspect_ai/_cli/trace.py +21 -2
  5. inspect_ai/_display/core/active.py +4 -3
  6. inspect_ai/_display/core/config.py +3 -3
  7. inspect_ai/_display/core/panel.py +7 -3
  8. inspect_ai/_display/plain/__init__.py +0 -0
  9. inspect_ai/_display/plain/display.py +203 -0
  10. inspect_ai/_display/rich/display.py +4 -9
  11. inspect_ai/_display/textual/app.py +4 -1
  12. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  13. inspect_ai/_display/textual/widgets/samples.py +119 -16
  14. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  15. inspect_ai/_eval/eval.py +32 -20
  16. inspect_ai/_eval/evalset.py +7 -5
  17. inspect_ai/_eval/score.py +1 -0
  18. inspect_ai/_eval/task/__init__.py +2 -2
  19. inspect_ai/_eval/task/images.py +40 -25
  20. inspect_ai/_eval/task/results.py +50 -22
  21. inspect_ai/_eval/task/run.py +180 -124
  22. inspect_ai/_eval/task/sandbox.py +10 -5
  23. inspect_ai/_eval/task/task.py +140 -25
  24. inspect_ai/_util/constants.py +2 -0
  25. inspect_ai/_util/content.py +23 -1
  26. inspect_ai/_util/images.py +20 -17
  27. inspect_ai/_util/kvstore.py +73 -0
  28. inspect_ai/_util/notgiven.py +18 -0
  29. inspect_ai/_util/port_names.py +61 -0
  30. inspect_ai/_util/text.py +23 -0
  31. inspect_ai/_util/thread.py +5 -0
  32. inspect_ai/_view/www/App.css +31 -1
  33. inspect_ai/_view/www/dist/assets/index.css +31 -1
  34. inspect_ai/_view/www/dist/assets/index.js +25375 -1846
  35. inspect_ai/_view/www/log-schema.json +129 -15
  36. inspect_ai/_view/www/package.json +2 -0
  37. inspect_ai/_view/www/src/App.mjs +8 -10
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  40. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  41. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  42. inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
  43. inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
  44. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  45. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  46. inspect_ai/_view/www/src/index.js +75 -2
  47. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  48. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  49. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  50. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  51. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  52. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  53. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
  54. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  55. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  56. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +62 -27
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  76. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  77. inspect_ai/_view/www/vite.config.js +7 -0
  78. inspect_ai/_view/www/yarn.lock +116 -0
  79. inspect_ai/approval/_human/__init__.py +0 -0
  80. inspect_ai/approval/_human/util.py +2 -2
  81. inspect_ai/approval/_policy.py +12 -6
  82. inspect_ai/dataset/_sources/csv.py +2 -1
  83. inspect_ai/dataset/_sources/json.py +2 -1
  84. inspect_ai/dataset/_sources/util.py +15 -7
  85. inspect_ai/log/_condense.py +11 -1
  86. inspect_ai/log/_log.py +3 -6
  87. inspect_ai/log/_recorders/eval.py +19 -8
  88. inspect_ai/log/_samples.py +26 -5
  89. inspect_ai/log/_transcript.py +32 -2
  90. inspect_ai/model/__init__.py +10 -2
  91. inspect_ai/model/_call_tools.py +59 -12
  92. inspect_ai/model/_chat_message.py +2 -4
  93. inspect_ai/model/_conversation.py +61 -0
  94. inspect_ai/model/_generate_config.py +10 -4
  95. inspect_ai/model/_model.py +117 -18
  96. inspect_ai/model/_model_output.py +7 -2
  97. inspect_ai/model/_providers/anthropic.py +109 -51
  98. inspect_ai/model/_providers/azureai.py +26 -24
  99. inspect_ai/model/_providers/bedrock.py +43 -44
  100. inspect_ai/model/_providers/google.py +121 -58
  101. inspect_ai/model/_providers/groq.py +7 -5
  102. inspect_ai/model/_providers/hf.py +11 -6
  103. inspect_ai/model/_providers/mistral.py +17 -20
  104. inspect_ai/model/_providers/openai.py +32 -21
  105. inspect_ai/model/_providers/openai_o1.py +9 -8
  106. inspect_ai/model/_providers/providers.py +1 -1
  107. inspect_ai/model/_providers/together.py +8 -8
  108. inspect_ai/model/_providers/vertex.py +18 -8
  109. inspect_ai/scorer/__init__.py +13 -2
  110. inspect_ai/scorer/_metrics/__init__.py +2 -2
  111. inspect_ai/scorer/_metrics/std.py +3 -3
  112. inspect_ai/scorer/_reducer/reducer.py +1 -1
  113. inspect_ai/scorer/_scorer.py +2 -2
  114. inspect_ai/solver/__init__.py +2 -5
  115. inspect_ai/solver/_prompt.py +35 -5
  116. inspect_ai/solver/_task_state.py +80 -38
  117. inspect_ai/tool/__init__.py +11 -1
  118. inspect_ai/tool/_tool.py +21 -3
  119. inspect_ai/tool/_tool_call.py +10 -0
  120. inspect_ai/tool/_tool_def.py +16 -5
  121. inspect_ai/tool/_tool_with.py +21 -4
  122. inspect_ai/tool/beta/__init__.py +5 -0
  123. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  124. inspect_ai/tool/beta/_computer/_common.py +133 -0
  125. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  126. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  127. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  128. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  129. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  130. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  131. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  134. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  135. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  136. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  137. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  138. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  139. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  144. inspect_ai/util/__init__.py +2 -3
  145. inspect_ai/util/{_trace.py → _conversation.py} +3 -17
  146. inspect_ai/util/_display.py +14 -4
  147. inspect_ai/util/_limit.py +26 -0
  148. inspect_ai/util/_sandbox/context.py +12 -13
  149. inspect_ai/util/_sandbox/docker/compose.py +24 -11
  150. inspect_ai/util/_sandbox/docker/docker.py +84 -14
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/environment.py +27 -1
  153. inspect_ai/util/_sandbox/local.py +1 -0
  154. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  155. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
  156. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  157. inspect_ai/model/_trace.py +0 -48
  158. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  159. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  160. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  161. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from copy import copy
1
+ from copy import deepcopy
2
2
 
3
3
  from inspect_ai._util.registry import (
4
4
  registry_info,
@@ -6,8 +6,9 @@ from inspect_ai._util.registry import (
6
6
  set_registry_info,
7
7
  set_registry_params,
8
8
  )
9
+ from inspect_ai.tool._tool_call import ToolCallModelInput, ToolCallViewer
9
10
 
10
- from ._tool import Tool
11
+ from ._tool import TOOL_MODEL_INPUT, TOOL_PARALLEL, TOOL_VIEWER, Tool
11
12
  from ._tool_description import ToolDescription, set_tool_description
12
13
  from ._tool_info import parse_tool_info
13
14
 
@@ -17,6 +18,9 @@ def tool_with(
17
18
  name: str | None = None,
18
19
  description: str | None = None,
19
20
  parameters: dict[str, str] | None = None,
21
+ parallel: bool | None = None,
22
+ viewer: ToolCallViewer | None = None,
23
+ model_input: ToolCallModelInput | None = None,
20
24
  ) -> Tool:
21
25
  """Tool with modifications to name and descriptions.
22
26
 
@@ -25,6 +29,11 @@ def tool_with(
25
29
  name (str | None): Tool name (optional).
26
30
  description (str | None): Tool description (optional).
27
31
  parameters (dict[str,str] | None): Parameter descriptions (optional)
32
+ parallel (bool | None): Does the tool support parallel execution
33
+ (defaults to True if not specified)
34
+ viewer (ToolCallViewer | None): Optional tool call viewer implementation.
35
+ model_input (ToolCallModelInput | None): Optional function that determines how
36
+ tool call results are played back as model input.
28
37
 
29
38
  Returns:
30
39
  A copy of the passed tool with the specified descriptive information.
@@ -46,8 +55,16 @@ def tool_with(
46
55
  ]
47
56
 
48
57
  # copy the tool and set the descriptions on the new copy
49
- tool_copy = copy(tool)
50
- set_registry_info(tool_copy, registry_info(tool))
58
+ tool_copy = deepcopy(tool)
59
+ info = registry_info(tool).model_copy()
60
+ if parallel is not None:
61
+ info.metadata[TOOL_PARALLEL] = parallel
62
+ elif viewer is not None:
63
+ info.metadata[TOOL_VIEWER] = viewer
64
+ elif model_input is not None:
65
+ info.metadata[TOOL_MODEL_INPUT] = model_input
66
+
67
+ set_registry_info(tool_copy, info)
51
68
  set_registry_params(tool_copy, registry_params(tool))
52
69
  set_tool_description(
53
70
  tool_copy,
@@ -0,0 +1,5 @@
1
+ from ._computer import computer
2
+
3
+ __all__ = [
4
+ "computer",
5
+ ]
@@ -0,0 +1,3 @@
1
+ from ._computer import computer
2
+
3
+ __all__ = ["computer"]
@@ -0,0 +1,133 @@
1
+ import json
2
+ from textwrap import dedent
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from inspect_ai._util.content import ContentText
8
+ from inspect_ai._util.error import PrerequisiteError
9
+ from inspect_ai.model import ContentImage
10
+ from inspect_ai.tool import ToolError, ToolResult
11
+ from inspect_ai.util._sandbox.context import sandbox_with
12
+ from inspect_ai.util._sandbox.environment import SandboxEnvironment
13
+
14
+ Action = Literal[
15
+ "key",
16
+ "type",
17
+ "mouse_move",
18
+ "left_click",
19
+ "left_click_drag",
20
+ "right_click",
21
+ "middle_click",
22
+ "double_click",
23
+ "screenshot",
24
+ "cursor_position",
25
+ ]
26
+
27
+
28
+ class ToolExecResult(BaseModel):
29
+ output: str | None = Field(default=None)
30
+ error: str | None = Field(default=None)
31
+ base64_image: str | None = Field(default=None)
32
+
33
+
34
+ async def _send_cmd(cmdTail: list[str], timeout: int | None = None) -> ToolResult:
35
+ from inspect_ai.log._samples import sample_active
36
+
37
+ sample = sample_active()
38
+ assert sample
39
+ sample_id = sample.sample.id
40
+ assert sample_id
41
+
42
+ cmd = ["python3", "/opt/inspect/tool/computer_tool.py", "--action"] + cmdTail
43
+
44
+ raw_exec_result = await (await computer_sandbox()).exec(cmd, timeout=timeout)
45
+
46
+ if not raw_exec_result.success:
47
+ raise RuntimeError(
48
+ f"Failure executing command: ${cmd} {raw_exec_result.stderr}"
49
+ )
50
+
51
+ result = ToolExecResult(**json.loads(raw_exec_result.stdout))
52
+
53
+ if result.error:
54
+ raise ToolError(result.error)
55
+
56
+ image = (
57
+ ContentImage(image=f"data:image/png;base64,{result.base64_image}")
58
+ if result.base64_image
59
+ else None
60
+ )
61
+ text = result.output if result.output and len(result.output) > 0 else None
62
+
63
+ if text is not None and image is not None:
64
+ return [ContentText(text=text), image]
65
+
66
+ if text is not None:
67
+ return text
68
+
69
+ if image is not None:
70
+ return [image]
71
+
72
+ return "OK"
73
+
74
+
75
+ async def cursor_position(timeout: int | None = None) -> ToolResult:
76
+ return await _send_cmd(["cursor_position"], timeout=timeout)
77
+
78
+
79
+ async def screenshot(timeout: int | None = None) -> ToolResult:
80
+ return await _send_cmd(["screenshot"], timeout=timeout)
81
+
82
+
83
+ async def mouse_move(x: int, y: int, timeout: int | None = None) -> ToolResult:
84
+ return await _send_cmd(
85
+ ["mouse_move", "--coordinate", f"{x}", f"{y}"], timeout=timeout
86
+ )
87
+
88
+
89
+ async def left_click(timeout: int | None = None) -> ToolResult:
90
+ return await _send_cmd(["left_click"], timeout=timeout)
91
+
92
+
93
+ async def left_click_drag(x: int, y: int, timeout: int | None = None) -> ToolResult:
94
+ return await _send_cmd(
95
+ ["left_click_drag", "--coordinate", f"{x}", f"{y}"], timeout=timeout
96
+ )
97
+
98
+
99
+ async def right_click(timeout: int | None = None) -> ToolResult:
100
+ return await _send_cmd(["right_click"], timeout=timeout)
101
+
102
+
103
+ async def middle_click(timeout: int | None = None) -> ToolResult:
104
+ return await _send_cmd(["middle_click"], timeout=timeout)
105
+
106
+
107
+ async def double_click(timeout: int | None = None) -> ToolResult:
108
+ return await _send_cmd(["double_click"], timeout=timeout)
109
+
110
+
111
+ async def press_key(key: str, timeout: int | None = None) -> ToolResult:
112
+ return await _send_cmd(["key", "--text", key], timeout=timeout)
113
+
114
+
115
+ async def type(text: str, timeout: int | None = None) -> ToolResult:
116
+ return await _send_cmd(["type", "--text", text], timeout=timeout)
117
+
118
+
119
+ async def computer_sandbox() -> SandboxEnvironment:
120
+ sb = await sandbox_with("/opt/inspect/tool/computer_tool.py")
121
+ if sb:
122
+ return sb
123
+ else:
124
+ raise PrerequisiteError(
125
+ dedent("""
126
+ The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest-beta image as its default sandbox:
127
+
128
+ services:
129
+ default:
130
+ image: "aisiuk/inspect-computer-tool:latest-beta"
131
+ init: true
132
+ """).strip()
133
+ )
@@ -0,0 +1,155 @@
1
+ from typing import Awaitable, Callable
2
+
3
+ from inspect_ai._util.content import Content, ContentImage, ContentText
4
+ from inspect_ai.tool import Tool, ToolResult, tool
5
+ from inspect_ai.tool._tool import (
6
+ TOOL_INIT_MODEL_INPUT,
7
+ ToolParsingError,
8
+ )
9
+ from inspect_ai.tool._tool_call import ToolCallModelInput
10
+
11
+ from . import _common as common
12
+ from ._common import Action
13
+
14
+ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
15
+
16
+
17
+ @tool
18
+ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
19
+ async def execute(
20
+ action: Action,
21
+ text: str | None = None,
22
+ coordinate: list[int] | None = None,
23
+ ) -> ToolResult:
24
+ """
25
+ Use this tool to interact with a computer.
26
+
27
+ Use a mouse and keyboard to interact with a computer's desktop GUI.
28
+
29
+ Keep in mind that icons require double clicks to open while other UI affordances like menu items and buttons require a single click.
30
+
31
+ Args:
32
+ action (Action): The action to perform.
33
+ - `key`: Press a key or key-combination on the keyboard.
34
+ - Example: execute(action="key", text="ctrl+s")
35
+ - Text can be any key name supported by xdotool's `key` such as:
36
+ "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
37
+ "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
38
+ "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
39
+ "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
40
+ "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
41
+ "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal",
42
+ - `type`: Type a string of text on the keyboard. If the text contains spaces, enclose it in quotes.
43
+ - Example: execute(action="type", text="The crux of the biscuit is the apostrophe!")
44
+ - `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
45
+ - `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
46
+ - Example: execute(action="mouse_move", coordinate=(100, 200))
47
+ - `left_click`: Click the left mouse button.
48
+ - `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
49
+ - Example: execute(action="left_click_drag", coordinate=(150, 250))
50
+ - `right_click`: Click the right mouse button.
51
+ - `middle_click`: Click the middle mouse button.
52
+ - `double_click`: Double-click the left mouse button.
53
+ - `screenshot`: Take a screenshot.
54
+ text (str | None): The text to type or the key to press. Required when action is "key" or "type".
55
+ coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen to which to move or drag. Required when action is "mouse_move" or "left_click_drag".
56
+
57
+ Returns:
58
+ The output of the command. Many commands will include a screenshot reflecting the result of the command in their output.
59
+ """
60
+ if action in ("mouse_move", "left_click_drag"):
61
+ if coordinate is None:
62
+ raise ToolParsingError(f"coordinate is required for {action}")
63
+ if text is not None:
64
+ raise ToolParsingError(f"text is not accepted for {action}")
65
+ if not isinstance(coordinate, list) or len(coordinate) != 2:
66
+ raise ToolParsingError(f"{coordinate} must be a tuple of length 2")
67
+ if not all(isinstance(i, int) and i >= 0 for i in coordinate):
68
+ raise ToolParsingError(
69
+ f"{coordinate} must be a tuple of non-negative ints"
70
+ )
71
+
72
+ if action == "mouse_move":
73
+ return await common.mouse_move(
74
+ coordinate[0], coordinate[1], timeout=timeout
75
+ )
76
+ elif action == "left_click_drag":
77
+ return await common.left_click_drag(
78
+ coordinate[0], coordinate[1], timeout=timeout
79
+ )
80
+
81
+ if action in ("key", "type"):
82
+ if text is None:
83
+ raise ToolParsingError(f"text is required for {action}")
84
+ if coordinate is not None:
85
+ raise ToolParsingError(f"coordinate is not accepted for {action}")
86
+ if not isinstance(text, str):
87
+ raise ToolParsingError(output=f"{text} must be a string")
88
+
89
+ if action == "key":
90
+ return await common.press_key(text, timeout=timeout)
91
+ elif action == "type":
92
+ return await common.type(text, timeout=timeout)
93
+
94
+ if action in (
95
+ "left_click",
96
+ "right_click",
97
+ "double_click",
98
+ "middle_click",
99
+ "screenshot",
100
+ "cursor_position",
101
+ ):
102
+ if text is not None:
103
+ raise ToolParsingError(f"text is not accepted for {action}")
104
+ if coordinate is not None:
105
+ raise ToolParsingError(f"coordinate is not accepted for {action}")
106
+
107
+ if action == "screenshot":
108
+ return await common.screenshot(timeout=timeout)
109
+ elif action == "cursor_position":
110
+ return await common.cursor_position(timeout=timeout)
111
+ elif action == "left_click":
112
+ return await common.left_click(timeout=timeout)
113
+ elif action == "right_click":
114
+ return await common.right_click(timeout=timeout)
115
+ elif action == "middle_click":
116
+ return await common.middle_click(timeout=timeout)
117
+ elif action == "double_click":
118
+ return await common.double_click(timeout=timeout)
119
+
120
+ raise ToolParsingError(f"Invalid action: {action}")
121
+
122
+ # if max_screenshots is specified then polk model input into where @tool can find it
123
+ if max_screenshots is not None:
124
+ setattr(execute, TOOL_INIT_MODEL_INPUT, _computer_model_input(max_screenshots))
125
+
126
+ return execute
127
+
128
+
129
+ def _computer_model_input(max_screenshots: int) -> ToolCallModelInput:
130
+ def model_input(
131
+ message_index: int, message_total: int, content: str | list[Content]
132
+ ) -> str | list[Content]:
133
+ # nothing to do for scalars
134
+ if isinstance(content, str):
135
+ return content
136
+
137
+ # if we are inside max_screenshots then return as is
138
+ elif (message_total - message_index) <= max_screenshots:
139
+ return content
140
+
141
+ # otherwise convert images to text placeholdrs
142
+ else:
143
+ input_content: list[Content] = []
144
+ for c in content:
145
+ if isinstance(c, ContentImage):
146
+ input_content.append(
147
+ ContentText(
148
+ text="Screenshot removed to reduce size of input. Please consult the latest screenshots for the most up to date state of the screen."
149
+ )
150
+ )
151
+ else:
152
+ input_content.append(c)
153
+ return input_content
154
+
155
+ return model_input
@@ -0,0 +1,198 @@
1
+ """
2
+ This module provides the same functionality as the computer tool but via a list of per-action tools . e.g. computer_mouse_move(100, 100).
3
+
4
+ The split version is not publicly exported, but is retained until we decide if it performs better than the monolithic computer tool.
5
+ """
6
+
7
+ from typing import Awaitable, Callable
8
+
9
+ from inspect_ai.tool import Tool, ToolResult, tool
10
+
11
+ from . import _common as common
12
+
13
+ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
14
+
15
+
16
+ def computer_split(timeout: int | None = None) -> list[Tool]:
17
+ """
18
+ Computer interaction tools.
19
+
20
+ Args:
21
+ timeout (int | None): Timeout (in seconds) for command.
22
+
23
+ Returns:
24
+ List of computer interaction tools.
25
+ """
26
+ return [
27
+ computer_cursor_position(),
28
+ computer_screenshot(),
29
+ computer_mouse_move(),
30
+ computer_left_click(),
31
+ computer_double_click(),
32
+ computer_left_click_drag(),
33
+ computer_right_click(),
34
+ computer_key(),
35
+ computer_type(),
36
+ ]
37
+
38
+
39
+ @tool()
40
+ def computer_cursor_position(timeout: int | None = None) -> Tool:
41
+ async def execute() -> ToolResult:
42
+ """
43
+ Get the current (x, y) pixel coordinate of the cursor on the screen.
44
+
45
+ Args:
46
+ None
47
+
48
+ Returns:
49
+ A `str` of the form "x y" where x and y are the current mouse coordinates.
50
+ """
51
+ return await common.cursor_position(timeout=timeout)
52
+
53
+ return execute
54
+
55
+
56
+ @tool()
57
+ def computer_screenshot(timeout: int | None = None) -> Tool:
58
+ async def execute() -> ToolResult:
59
+ """
60
+ Take a screenshot.
61
+
62
+ Args:
63
+ None
64
+
65
+ Returns:
66
+ A `list` with a single `ContentImage` of the screen.
67
+ """
68
+ return await common.screenshot(timeout=timeout)
69
+
70
+ return execute
71
+
72
+
73
+ @tool()
74
+ def computer_mouse_move(timeout: int | None = None) -> Tool:
75
+ async def execute(x: int, y: int) -> ToolResult:
76
+ """
77
+ Move the cursor to a specified (x, y) pixel coordinate on the screen.
78
+
79
+ Args:
80
+ x: X coordinate of the mouse destination.
81
+ y: Y coordinate of the mouse destination.
82
+
83
+ Returns:
84
+ A `list` with a single `ContentImage` of the screen.
85
+ """
86
+ return await common.mouse_move(x, y, timeout=timeout)
87
+
88
+ return execute
89
+
90
+
91
+ @tool()
92
+ def computer_left_click(timeout: int | None = None) -> Tool:
93
+ async def execute() -> ToolResult:
94
+ """
95
+ Click the left mouse button.
96
+
97
+ Args:
98
+ None
99
+
100
+ Returns:
101
+ A `list` with a single `ContentImage` of the screen.
102
+ """
103
+ return await common.left_click(timeout=timeout)
104
+
105
+ return execute
106
+
107
+
108
+ @tool()
109
+ def computer_double_click(timeout: int | None = None) -> Tool:
110
+ async def execute() -> ToolResult:
111
+ """
112
+ Double-click the left mouse button.
113
+
114
+ Args:
115
+ None
116
+
117
+ Returns:
118
+ A `list` with a single `ContentImage` of the screen.
119
+ """
120
+ return await common.double_click(timeout=timeout)
121
+
122
+ return execute
123
+
124
+
125
+ @tool()
126
+ def computer_left_click_drag(timeout: int | None = None) -> Tool:
127
+ async def execute(x: int, y: int) -> ToolResult:
128
+ """
129
+ Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
130
+
131
+ Args:
132
+ x: X coordinate of the mouse destination.
133
+ y: Y coordinate of the mouse destination.
134
+
135
+ Returns:
136
+ A `list` with a single `ContentImage` of the screen.
137
+ """
138
+ return await common.left_click_drag(x, y, timeout=timeout)
139
+
140
+ return execute
141
+
142
+
143
+ @tool()
144
+ def computer_right_click(timeout: int | None = None) -> Tool:
145
+ async def execute() -> ToolResult:
146
+ """
147
+ Click the right mouse button.
148
+
149
+ Args:
150
+ None
151
+
152
+ Returns:
153
+ A `list` with a single `ContentImage` of the screen.
154
+ """
155
+ return await common.right_click(timeout=timeout)
156
+
157
+ return execute
158
+
159
+
160
+ # keysm list is from https://gist.github.com/rvaiya/be31f42049a4b5ad46666a8e120d9843
161
+ @tool()
162
+ def computer_key(timeout: int | None = None) -> Tool:
163
+ async def execute(key: str) -> ToolResult:
164
+ """
165
+ Press a key or key-combination on the keyboard.
166
+
167
+ Args:
168
+ key: The key or key-combination to press. Can be any key name supported by xdotool's `key` such as:
169
+ "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
170
+ "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
171
+ "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
172
+ "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
173
+ "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
174
+ "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal"
175
+
176
+ Returns:
177
+ A `list` with a single `ContentImage` of the screen.
178
+ """
179
+ return await common.press_key(key, timeout=timeout)
180
+
181
+ return execute
182
+
183
+
184
+ @tool()
185
+ def computer_type(timeout: int | None = None) -> Tool:
186
+ async def execute(text: str) -> ToolResult:
187
+ """
188
+ Type a string of text on the keyboard.
189
+
190
+ Args:
191
+ text: The text to type. If the text contains spaces, enclose it in quotes.
192
+
193
+ Returns:
194
+ A `list` with a single `ContentImage` of the screen.
195
+ """
196
+ return await common.type(text, timeout=timeout)
197
+
198
+ return execute
@@ -0,0 +1,100 @@
1
+ FROM docker.io/ubuntu:22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV DEBIAN_PRIORITY=high
5
+
6
+ # Core/system layer
7
+ RUN apt-get update && \
8
+ apt-get -y upgrade && \
9
+ apt-get -y install \
10
+ # A virtual framebuffer for running GUI applications without a physical display.
11
+ xvfb \
12
+ # A lightweight desktop environment for UNIX-like operating systems.
13
+ xfce4 \
14
+ # The terminal emulator for the xfce4 desktop environment.
15
+ xfce4-terminal\
16
+ # A VNC server for sharing X11 desktops.
17
+ x11vnc \
18
+ # A web based VNC client
19
+ novnc \
20
+ # A WebSocket to TCP proxy/bridge for noVNC
21
+ websockify \
22
+ # The Python programming language interpreter.
23
+ python3 \
24
+ # The package installer for Python.
25
+ python3-pip \
26
+ # A command-line tool for automating X11 applications (e.g., simulating keyboard/mouse inputs).
27
+ xdotool \
28
+ # A command-line tool for taking screenshots.
29
+ scrot \
30
+ # A suite for image manipulation — needed for scaling images.
31
+ imagemagick && \
32
+ apt-get clean
33
+
34
+ # Userland apt-get'able apps
35
+ RUN apt-get install -y --no-install-recommends \
36
+ # A simple image viewer.
37
+ xpaint \
38
+ # A calculator application.
39
+ galculator && \
40
+ apt-get clean
41
+
42
+ # install Firefox
43
+ RUN apt-get install -y software-properties-common && \
44
+ add-apt-repository ppa:mozillateam/ppa && \
45
+ apt-get update && \
46
+ apt-get install -y --no-install-recommends firefox-esr && \
47
+ apt-get clean
48
+
49
+ # install VS Code
50
+ RUN apt-get install -y \
51
+ gpg \
52
+ wget \
53
+ apt-transport-https \
54
+ software-properties-common && \
55
+ wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg && \
56
+ install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg && \
57
+ sh -c 'echo "deb [arch=amd64,arm64 signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" > /etc/apt/sources.list.d/vscode.list' && \
58
+ apt-get update && \
59
+ apt-get install -y code && \
60
+ apt-get clean
61
+
62
+ # configure noVNC
63
+ RUN ln -s /usr/share/novnc/vnc.html /usr/share/novnc/index.html
64
+
65
+ # We copy requirements.txt by itself so that changes to the scripts will be in a later layer
66
+ # and we only pip install if requirements.txt changes
67
+ COPY tool/requirements.txt /opt/inspect/tool/requirements.txt
68
+ RUN cd /opt/inspect/tool && pip3 install --no-cache-dir -r requirements.txt
69
+
70
+ COPY tool/ /opt/inspect/tool
71
+ COPY entrypoint/ /opt/inspect/entrypoint
72
+ RUN chmod -R 755 /opt/inspect
73
+
74
+ # setup user
75
+ ENV USERNAME=user
76
+ ENV HOME=/home/$USERNAME
77
+ RUN useradd -m -s /bin/bash -d $HOME $USERNAME
78
+ RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
79
+ USER ${USERNAME}
80
+ WORKDIR $HOME
81
+ COPY --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
82
+
83
+ # configure Firefox to skip all 'first run' UI
84
+ RUN mkdir -p $HOME/.mozilla/firefox-esr/profile.default && \
85
+ echo 'user_pref("browser.startup.homepage_override.mstone", "ignore");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
86
+ echo 'user_pref("browser.aboutwelcome.enabled", false);' >> $HOME/.mozilla/firefox-esr/profile.default/user.js && \
87
+ echo 'user_pref("datareporting.policy.firstRunURL", "");' >> $HOME/.mozilla/firefox-esr/profile.default/user.js
88
+
89
+ EXPOSE 5900
90
+ EXPOSE 6080
91
+
92
+ ARG DISPLAY_NUM=1
93
+ ARG WIDTH=1920
94
+ ARG HEIGHT=1080
95
+ ENV DISPLAY_NUM=$DISPLAY_NUM
96
+ ENV DISPLAY=:${DISPLAY_NUM}
97
+ ENV HEIGHT=$HEIGHT
98
+ ENV WIDTH=$WIDTH
99
+
100
+ ENTRYPOINT [ "/opt/inspect/entrypoint/entrypoint.sh" ]
@@ -0,0 +1,30 @@
1
+ # About This Image
2
+
3
+ This image was inspired by Anthropic's Computer Use Demo [here](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo/image).
4
+
5
+ Its goal is to provide the minimum infrastructure to support the use of Inspect's `computer_tool` to interact with the computer via X11 and `xdotool`, while also providing observability and interaction via VNC and noVNC.
6
+
7
+ The image extends this minimal functionality by adding a few basic applications — VS Code, Firefox, XPaint, and galculator.
8
+
9
+ ## Entrypoint Directory
10
+
11
+ 1. **Xvfb (X Virtual Framebuffer)**
12
+ - **Script:** `xvfb_startup.sh`
13
+ - **Description:** Xvfb is a display server that implements the X11 display server protocol. It runs in memory and does not require a physical display, useful for running graphical applications in a headless environment.
14
+
15
+ 1. **xfce4**
16
+ - **Script:** `xfce4_startup.sh`
17
+ - **Description:** xfce4 is a lightweight desktop environment for UNIX-like operating systems. It aims to be fast, low on system resources, and user-friendly.
18
+
19
+ 1. **x11vnc**
20
+ - **Script:** `x11vnc_startup.sh`
21
+ - **Description:** x11vnc is a VNC server that allows remote access to the X11 display. It enables users to connect to the virtual display environment from a remote machine using a VNC client.
22
+
23
+ 1. **noVNC**
24
+ - **Script:** `novnc_startup.sh`
25
+ - **Description:** noVNC is a VNC client that runs in a web browser. It allows users to access the virtual display environment through a web interface without needing a separate VNC client application.
26
+
27
+ ## Desktop Directory
28
+
29
+ The `Desktop` directory contains launchers for VS Code, Firefox and XPaint.
30
+