inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -9
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +79 -12
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/eval.py +10 -1
  13. inspect_ai/_eval/loader.py +79 -19
  14. inspect_ai/_eval/registry.py +6 -0
  15. inspect_ai/_eval/score.py +3 -1
  16. inspect_ai/_eval/task/results.py +51 -22
  17. inspect_ai/_eval/task/run.py +47 -13
  18. inspect_ai/_eval/task/sandbox.py +10 -5
  19. inspect_ai/_util/constants.py +1 -0
  20. inspect_ai/_util/port_names.py +61 -0
  21. inspect_ai/_util/text.py +23 -0
  22. inspect_ai/_view/www/App.css +31 -1
  23. inspect_ai/_view/www/dist/assets/index.css +31 -1
  24. inspect_ai/_view/www/dist/assets/index.js +25498 -2044
  25. inspect_ai/_view/www/log-schema.json +32 -2
  26. inspect_ai/_view/www/package.json +2 -0
  27. inspect_ai/_view/www/src/App.mjs +14 -16
  28. inspect_ai/_view/www/src/Types.mjs +1 -2
  29. inspect_ai/_view/www/src/api/Types.ts +133 -0
  30. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  31. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  32. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  33. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  34. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  35. inspect_ai/_view/www/src/api/index.ts +51 -0
  36. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  37. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  38. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  39. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  40. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  41. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  42. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  43. inspect_ai/_view/www/src/index.js +77 -4
  44. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
  46. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
  47. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  48. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  49. inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
  50. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  51. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  52. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
  53. inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
  54. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  55. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  56. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
  76. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  77. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
  78. inspect_ai/_view/www/vite.config.js +7 -0
  79. inspect_ai/_view/www/yarn.lock +116 -0
  80. inspect_ai/approval/_human/__init__.py +0 -0
  81. inspect_ai/approval/_human/manager.py +1 -1
  82. inspect_ai/approval/_policy.py +12 -6
  83. inspect_ai/log/_log.py +1 -1
  84. inspect_ai/log/_samples.py +16 -0
  85. inspect_ai/log/_transcript.py +4 -1
  86. inspect_ai/model/_call_tools.py +59 -0
  87. inspect_ai/model/_conversation.py +16 -7
  88. inspect_ai/model/_generate_config.py +12 -12
  89. inspect_ai/model/_model.py +117 -18
  90. inspect_ai/model/_model_output.py +22 -2
  91. inspect_ai/model/_openai.py +383 -0
  92. inspect_ai/model/_providers/anthropic.py +152 -55
  93. inspect_ai/model/_providers/azureai.py +21 -21
  94. inspect_ai/model/_providers/bedrock.py +37 -40
  95. inspect_ai/model/_providers/goodfire.py +248 -0
  96. inspect_ai/model/_providers/google.py +46 -54
  97. inspect_ai/model/_providers/groq.py +7 -3
  98. inspect_ai/model/_providers/hf.py +6 -0
  99. inspect_ai/model/_providers/mistral.py +13 -12
  100. inspect_ai/model/_providers/openai.py +51 -218
  101. inspect_ai/model/_providers/openai_o1.py +11 -12
  102. inspect_ai/model/_providers/providers.py +23 -1
  103. inspect_ai/model/_providers/together.py +12 -12
  104. inspect_ai/model/_providers/util/__init__.py +2 -3
  105. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  106. inspect_ai/model/_providers/util/llama31.py +1 -1
  107. inspect_ai/model/_providers/util/util.py +0 -76
  108. inspect_ai/model/_providers/vertex.py +1 -4
  109. inspect_ai/scorer/_metric.py +3 -0
  110. inspect_ai/scorer/_reducer/reducer.py +1 -1
  111. inspect_ai/scorer/_scorer.py +4 -3
  112. inspect_ai/solver/__init__.py +4 -5
  113. inspect_ai/solver/_basic_agent.py +1 -1
  114. inspect_ai/solver/_bridge/__init__.py +3 -0
  115. inspect_ai/solver/_bridge/bridge.py +100 -0
  116. inspect_ai/solver/_bridge/patch.py +170 -0
  117. inspect_ai/solver/_prompt.py +35 -5
  118. inspect_ai/solver/_solver.py +6 -0
  119. inspect_ai/solver/_task_state.py +80 -38
  120. inspect_ai/tool/__init__.py +2 -0
  121. inspect_ai/tool/_tool.py +12 -1
  122. inspect_ai/tool/_tool_call.py +10 -0
  123. inspect_ai/tool/_tool_def.py +16 -5
  124. inspect_ai/tool/_tool_with.py +21 -4
  125. inspect_ai/tool/beta/__init__.py +5 -0
  126. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  127. inspect_ai/tool/beta/_computer/_common.py +133 -0
  128. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  129. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  130. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  131. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  134. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  135. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  136. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  137. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  138. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  139. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  144. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  145. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  146. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  147. inspect_ai/util/__init__.py +2 -0
  148. inspect_ai/util/_display.py +5 -0
  149. inspect_ai/util/_limit.py +26 -0
  150. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  153. inspect_ai/util/_sandbox/environment.py +14 -0
  154. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  155. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
  156. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  157. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  158. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  159. inspect_ai/_view/www/src/api/index.mjs +0 -49
  160. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  161. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  162. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  163. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  164. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  165. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  166. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
inspect_ai/tool/_tool.py CHANGED
@@ -24,7 +24,7 @@ from inspect_ai._util.registry import (
24
24
  registry_tag,
25
25
  )
26
26
 
27
- from ._tool_call import ToolCallViewer
27
+ from ._tool_call import ToolCallModelInput, ToolCallViewer
28
28
 
29
29
  logger = getLogger(__name__)
30
30
 
@@ -112,6 +112,7 @@ def tool(
112
112
  *,
113
113
  name: str | None = None,
114
114
  viewer: ToolCallViewer | None = None,
115
+ model_input: ToolCallModelInput | None = None,
115
116
  parallel: bool = True,
116
117
  prompt: str | None = None,
117
118
  ) -> Callable[[Callable[P, Tool]], Callable[P, Tool]]: ...
@@ -122,6 +123,7 @@ def tool(
122
123
  *,
123
124
  name: str | None = None,
124
125
  viewer: ToolCallViewer | None = None,
126
+ model_input: ToolCallModelInput | None = None,
125
127
  parallel: bool = True,
126
128
  prompt: str | None = None,
127
129
  ) -> Callable[P, Tool] | Callable[[Callable[P, Tool]], Callable[P, Tool]]:
@@ -135,6 +137,8 @@ def tool(
135
137
  will be used as the name of the tool.
136
138
  viewer (ToolCallViewer | None): Provide a custom view
137
139
  of tool call and context.
140
+ model_input (ToolCallModelInput | None): Provide a custom
141
+ function for playing back tool results as model input.
138
142
  parallel (bool):
139
143
  Does this tool support parallel execution?
140
144
  (defaults to True).
@@ -176,6 +180,9 @@ def tool(
176
180
  TOOL_PROMPT: prompt,
177
181
  TOOL_PARALLEL: parallel,
178
182
  TOOL_VIEWER: viewer,
183
+ TOOL_MODEL_INPUT: (
184
+ model_input or getattr(tool, TOOL_INIT_MODEL_INPUT, None)
185
+ ),
179
186
  },
180
187
  ),
181
188
  *args,
@@ -195,3 +202,7 @@ def tool(
195
202
  TOOL_PROMPT = "prompt"
196
203
  TOOL_PARALLEL = "parallel"
197
204
  TOOL_VIEWER = "viewer"
205
+ TOOL_MODEL_INPUT = "model_input"
206
+
207
+
208
+ TOOL_INIT_MODEL_INPUT = "__TOOL_INIT_MODEL_INPUT__"
@@ -3,6 +3,8 @@ from typing import Any, Callable, Literal
3
3
 
4
4
  from pydantic import BaseModel, Field
5
5
 
6
+ from inspect_ai._util.content import Content
7
+
6
8
 
7
9
  class ToolCallContent(BaseModel):
8
10
  """Content to include in tool call view."""
@@ -71,3 +73,11 @@ class ToolCallError:
71
73
 
72
74
  ToolCallViewer = Callable[[ToolCall], ToolCallView]
73
75
  """Custom view renderer for tool calls."""
76
+
77
+
78
+ ToolCallModelInput = Callable[[int, int, str | list[Content]], str | list[Content]]
79
+ """Determine how tool call results are played back as model input.
80
+
81
+ The first argument is an index into the total number of tool results
82
+ for this tool in the message history, the second is the total number.
83
+ """
@@ -13,8 +13,8 @@ from inspect_ai._util.registry import (
13
13
  set_registry_params,
14
14
  )
15
15
 
16
- from ._tool import TOOL_PARALLEL, TOOL_PROMPT, TOOL_VIEWER, Tool
17
- from ._tool_call import ToolCallViewer
16
+ from ._tool import TOOL_MODEL_INPUT, TOOL_PARALLEL, TOOL_PROMPT, TOOL_VIEWER, Tool
17
+ from ._tool_call import ToolCallModelInput, ToolCallViewer
18
18
  from ._tool_description import (
19
19
  ToolDescription,
20
20
  set_tool_description,
@@ -33,6 +33,7 @@ class ToolDef:
33
33
  parameters: dict[str, str] | ToolParams | None = None,
34
34
  parallel: bool | None = None,
35
35
  viewer: ToolCallViewer | None = None,
36
+ model_input: ToolCallModelInput | None = None,
36
37
  ) -> None:
37
38
  """Tool definition.
38
39
 
@@ -46,6 +47,8 @@ class ToolDef:
46
47
  parallel (bool | None): Does the tool support parallel execution
47
48
  (defaults to True if not specified)
48
49
  viewer (ToolCallViewer | None): Optional tool call viewer implementation.
50
+ model_input (ToolCallModelInput | None): Optional function that determines how
51
+ tool call results are played back as model input.
49
52
 
50
53
  Returns:
51
54
  Tool definition.
@@ -68,6 +71,7 @@ class ToolDef:
68
71
  parameters = parameters if parameters is not None else tdef.parameters
69
72
  self.parallel = parallel if parallel is not None else tdef.parallel
70
73
  self.viewer = viewer or tdef.viewer
74
+ self.model_input = model_input or tdef.model_input
71
75
 
72
76
  # if its not a tool then extract tool_info if all fields have not
73
77
  # been provided explicitly
@@ -97,6 +101,7 @@ class ToolDef:
97
101
  # behavioral attributes
98
102
  self.parallel = parallel is not False
99
103
  self.viewer = viewer
104
+ self.model_input = model_input
100
105
 
101
106
  tool: Callable[..., Any]
102
107
  """Callable to execute tool."""
@@ -116,6 +121,9 @@ class ToolDef:
116
121
  viewer: ToolCallViewer | None
117
122
  """Custom viewer for tool call"""
118
123
 
124
+ model_input: ToolCallModelInput | None
125
+ """Custom model input presenter for tool calls."""
126
+
119
127
  def as_tool(self) -> Tool:
120
128
  """Convert a ToolDef to a Tool."""
121
129
  tool = self.tool
@@ -159,11 +167,12 @@ class ToolDefFields(NamedTuple):
159
167
  parameters: ToolParams
160
168
  parallel: bool
161
169
  viewer: ToolCallViewer | None
170
+ model_input: ToolCallModelInput | None
162
171
 
163
172
 
164
173
  def tool_def_fields(tool: Tool) -> ToolDefFields:
165
174
  # get tool_info
166
- name, prompt, parallel, viewer = tool_registry_info(tool)
175
+ name, prompt, parallel, viewer, model_input = tool_registry_info(tool)
167
176
  tool_info = parse_tool_info(tool)
168
177
 
169
178
  # if there is a description then append any prompt to the
@@ -213,15 +222,17 @@ def tool_def_fields(tool: Tool) -> ToolDefFields:
213
222
  parameters=tool_info.parameters,
214
223
  parallel=parallel,
215
224
  viewer=viewer,
225
+ model_input=model_input,
216
226
  )
217
227
 
218
228
 
219
229
  def tool_registry_info(
220
230
  tool: Tool,
221
- ) -> tuple[str, str | None, bool, ToolCallViewer | None]:
231
+ ) -> tuple[str, str | None, bool, ToolCallViewer | None, ToolCallModelInput | None]:
222
232
  info = registry_info(tool)
223
233
  name = info.name.split("/")[-1]
224
234
  prompt = info.metadata.get(TOOL_PROMPT, None)
225
235
  parallel = info.metadata.get(TOOL_PARALLEL, True)
226
236
  viewer = info.metadata.get(TOOL_VIEWER, None)
227
- return name, prompt, parallel, viewer
237
+ model_input = info.metadata.get(TOOL_MODEL_INPUT, None)
238
+ return name, prompt, parallel, viewer, model_input
@@ -1,4 +1,4 @@
1
- from copy import copy
1
+ from copy import deepcopy
2
2
 
3
3
  from inspect_ai._util.registry import (
4
4
  registry_info,
@@ -6,8 +6,9 @@ from inspect_ai._util.registry import (
6
6
  set_registry_info,
7
7
  set_registry_params,
8
8
  )
9
+ from inspect_ai.tool._tool_call import ToolCallModelInput, ToolCallViewer
9
10
 
10
- from ._tool import Tool
11
+ from ._tool import TOOL_MODEL_INPUT, TOOL_PARALLEL, TOOL_VIEWER, Tool
11
12
  from ._tool_description import ToolDescription, set_tool_description
12
13
  from ._tool_info import parse_tool_info
13
14
 
@@ -17,6 +18,9 @@ def tool_with(
17
18
  name: str | None = None,
18
19
  description: str | None = None,
19
20
  parameters: dict[str, str] | None = None,
21
+ parallel: bool | None = None,
22
+ viewer: ToolCallViewer | None = None,
23
+ model_input: ToolCallModelInput | None = None,
20
24
  ) -> Tool:
21
25
  """Tool with modifications to name and descriptions.
22
26
 
@@ -25,6 +29,11 @@ def tool_with(
25
29
  name (str | None): Tool name (optional).
26
30
  description (str | None): Tool description (optional).
27
31
  parameters (dict[str,str] | None): Parameter descriptions (optional)
32
+ parallel (bool | None): Does the tool support parallel execution
33
+ (defaults to True if not specified)
34
+ viewer (ToolCallViewer | None): Optional tool call viewer implementation.
35
+ model_input (ToolCallModelInput | None): Optional function that determines how
36
+ tool call results are played back as model input.
28
37
 
29
38
  Returns:
30
39
  A copy of the passed tool with the specified descriptive information.
@@ -46,8 +55,16 @@ def tool_with(
46
55
  ]
47
56
 
48
57
  # copy the tool and set the descriptions on the new copy
49
- tool_copy = copy(tool)
50
- set_registry_info(tool_copy, registry_info(tool))
58
+ tool_copy = deepcopy(tool)
59
+ info = registry_info(tool).model_copy()
60
+ if parallel is not None:
61
+ info.metadata[TOOL_PARALLEL] = parallel
62
+ elif viewer is not None:
63
+ info.metadata[TOOL_VIEWER] = viewer
64
+ elif model_input is not None:
65
+ info.metadata[TOOL_MODEL_INPUT] = model_input
66
+
67
+ set_registry_info(tool_copy, info)
51
68
  set_registry_params(tool_copy, registry_params(tool))
52
69
  set_tool_description(
53
70
  tool_copy,
@@ -0,0 +1,5 @@
1
+ from ._computer import computer
2
+
3
+ __all__ = [
4
+ "computer",
5
+ ]
@@ -0,0 +1,3 @@
1
+ from ._computer import computer
2
+
3
+ __all__ = ["computer"]
@@ -0,0 +1,133 @@
1
+ import json
2
+ from textwrap import dedent
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from inspect_ai._util.content import ContentText
8
+ from inspect_ai._util.error import PrerequisiteError
9
+ from inspect_ai.model import ContentImage
10
+ from inspect_ai.tool import ToolError, ToolResult
11
+ from inspect_ai.util._sandbox.context import sandbox_with
12
+ from inspect_ai.util._sandbox.environment import SandboxEnvironment
13
+
14
+ Action = Literal[
15
+ "key",
16
+ "type",
17
+ "mouse_move",
18
+ "left_click",
19
+ "left_click_drag",
20
+ "right_click",
21
+ "middle_click",
22
+ "double_click",
23
+ "screenshot",
24
+ "cursor_position",
25
+ ]
26
+
27
+
28
+ class ToolExecResult(BaseModel):
29
+ output: str | None = Field(default=None)
30
+ error: str | None = Field(default=None)
31
+ base64_image: str | None = Field(default=None)
32
+
33
+
34
+ async def _send_cmd(cmdTail: list[str], timeout: int | None = None) -> ToolResult:
35
+ from inspect_ai.log._samples import sample_active
36
+
37
+ sample = sample_active()
38
+ assert sample
39
+ sample_id = sample.sample.id
40
+ assert sample_id
41
+
42
+ cmd = ["python3", "/opt/inspect/tool/computer_tool.py", "--action"] + cmdTail
43
+
44
+ raw_exec_result = await (await computer_sandbox()).exec(cmd, timeout=timeout)
45
+
46
+ if not raw_exec_result.success:
47
+ raise RuntimeError(
48
+ f"Failure executing command: ${cmd} {raw_exec_result.stderr}"
49
+ )
50
+
51
+ result = ToolExecResult(**json.loads(raw_exec_result.stdout))
52
+
53
+ if result.error:
54
+ raise ToolError(result.error)
55
+
56
+ image = (
57
+ ContentImage(image=f"data:image/png;base64,{result.base64_image}")
58
+ if result.base64_image
59
+ else None
60
+ )
61
+ text = result.output if result.output and len(result.output) > 0 else None
62
+
63
+ if text is not None and image is not None:
64
+ return [ContentText(text=text), image]
65
+
66
+ if text is not None:
67
+ return text
68
+
69
+ if image is not None:
70
+ return [image]
71
+
72
+ return "OK"
73
+
74
+
75
+ async def cursor_position(timeout: int | None = None) -> ToolResult:
76
+ return await _send_cmd(["cursor_position"], timeout=timeout)
77
+
78
+
79
+ async def screenshot(timeout: int | None = None) -> ToolResult:
80
+ return await _send_cmd(["screenshot"], timeout=timeout)
81
+
82
+
83
+ async def mouse_move(x: int, y: int, timeout: int | None = None) -> ToolResult:
84
+ return await _send_cmd(
85
+ ["mouse_move", "--coordinate", f"{x}", f"{y}"], timeout=timeout
86
+ )
87
+
88
+
89
+ async def left_click(timeout: int | None = None) -> ToolResult:
90
+ return await _send_cmd(["left_click"], timeout=timeout)
91
+
92
+
93
+ async def left_click_drag(x: int, y: int, timeout: int | None = None) -> ToolResult:
94
+ return await _send_cmd(
95
+ ["left_click_drag", "--coordinate", f"{x}", f"{y}"], timeout=timeout
96
+ )
97
+
98
+
99
+ async def right_click(timeout: int | None = None) -> ToolResult:
100
+ return await _send_cmd(["right_click"], timeout=timeout)
101
+
102
+
103
+ async def middle_click(timeout: int | None = None) -> ToolResult:
104
+ return await _send_cmd(["middle_click"], timeout=timeout)
105
+
106
+
107
+ async def double_click(timeout: int | None = None) -> ToolResult:
108
+ return await _send_cmd(["double_click"], timeout=timeout)
109
+
110
+
111
+ async def press_key(key: str, timeout: int | None = None) -> ToolResult:
112
+ return await _send_cmd(["key", "--text", key], timeout=timeout)
113
+
114
+
115
+ async def type(text: str, timeout: int | None = None) -> ToolResult:
116
+ return await _send_cmd(["type", "--text", text], timeout=timeout)
117
+
118
+
119
+ async def computer_sandbox() -> SandboxEnvironment:
120
+ sb = await sandbox_with("/opt/inspect/tool/computer_tool.py")
121
+ if sb:
122
+ return sb
123
+ else:
124
+ raise PrerequisiteError(
125
+ dedent("""
126
+ The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest-beta image as its default sandbox:
127
+
128
+ services:
129
+ default:
130
+ image: "aisiuk/inspect-computer-tool:latest-beta"
131
+ init: true
132
+ """).strip()
133
+ )
@@ -0,0 +1,155 @@
1
+ from typing import Awaitable, Callable
2
+
3
+ from inspect_ai._util.content import Content, ContentImage, ContentText
4
+ from inspect_ai.tool import Tool, ToolResult, tool
5
+ from inspect_ai.tool._tool import (
6
+ TOOL_INIT_MODEL_INPUT,
7
+ ToolParsingError,
8
+ )
9
+ from inspect_ai.tool._tool_call import ToolCallModelInput
10
+
11
+ from . import _common as common
12
+ from ._common import Action
13
+
14
+ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
15
+
16
+
17
+ @tool
18
+ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
19
+ async def execute(
20
+ action: Action,
21
+ text: str | None = None,
22
+ coordinate: list[int] | None = None,
23
+ ) -> ToolResult:
24
+ """
25
+ Use this tool to interact with a computer.
26
+
27
+ Use a mouse and keyboard to interact with a computer's desktop GUI.
28
+
29
+ Keep in mind that icons require double clicks to open while other UI affordances like menu items and buttons require a single click.
30
+
31
+ Args:
32
+ action (Action): The action to perform.
33
+ - `key`: Press a key or key-combination on the keyboard.
34
+ - Example: execute(action="key", text="ctrl+s")
35
+ - Text can be any key name supported by xdotool's `key` such as:
36
+ "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
37
+ "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
38
+ "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
39
+ "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
40
+ "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
41
+ "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal",
42
+ - `type`: Type a string of text on the keyboard. If the text contains spaces, enclose it in quotes.
43
+ - Example: execute(action="type", text="The crux of the biscuit is the apostrophe!")
44
+ - `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
45
+ - `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
46
+ - Example: execute(action="mouse_move", coordinate=(100, 200))
47
+ - `left_click`: Click the left mouse button.
48
+ - `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
49
+ - Example: execute(action="left_click_drag", coordinate=(150, 250))
50
+ - `right_click`: Click the right mouse button.
51
+ - `middle_click`: Click the middle mouse button.
52
+ - `double_click`: Double-click the left mouse button.
53
+ - `screenshot`: Take a screenshot.
54
+ text (str | None): The text to type or the key to press. Required when action is "key" or "type".
55
+ coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen to which to move or drag. Required when action is "mouse_move" or "left_click_drag".
56
+
57
+ Returns:
58
+ The output of the command. Many commands will include a screenshot reflecting the result of the command in their output.
59
+ """
60
+ if action in ("mouse_move", "left_click_drag"):
61
+ if coordinate is None:
62
+ raise ToolParsingError(f"coordinate is required for {action}")
63
+ if text is not None:
64
+ raise ToolParsingError(f"text is not accepted for {action}")
65
+ if not isinstance(coordinate, list) or len(coordinate) != 2:
66
+ raise ToolParsingError(f"{coordinate} must be a tuple of length 2")
67
+ if not all(isinstance(i, int) and i >= 0 for i in coordinate):
68
+ raise ToolParsingError(
69
+ f"{coordinate} must be a tuple of non-negative ints"
70
+ )
71
+
72
+ if action == "mouse_move":
73
+ return await common.mouse_move(
74
+ coordinate[0], coordinate[1], timeout=timeout
75
+ )
76
+ elif action == "left_click_drag":
77
+ return await common.left_click_drag(
78
+ coordinate[0], coordinate[1], timeout=timeout
79
+ )
80
+
81
+ if action in ("key", "type"):
82
+ if text is None:
83
+ raise ToolParsingError(f"text is required for {action}")
84
+ if coordinate is not None:
85
+ raise ToolParsingError(f"coordinate is not accepted for {action}")
86
+ if not isinstance(text, str):
87
+ raise ToolParsingError(output=f"{text} must be a string")
88
+
89
+ if action == "key":
90
+ return await common.press_key(text, timeout=timeout)
91
+ elif action == "type":
92
+ return await common.type(text, timeout=timeout)
93
+
94
+ if action in (
95
+ "left_click",
96
+ "right_click",
97
+ "double_click",
98
+ "middle_click",
99
+ "screenshot",
100
+ "cursor_position",
101
+ ):
102
+ if text is not None:
103
+ raise ToolParsingError(f"text is not accepted for {action}")
104
+ if coordinate is not None:
105
+ raise ToolParsingError(f"coordinate is not accepted for {action}")
106
+
107
+ if action == "screenshot":
108
+ return await common.screenshot(timeout=timeout)
109
+ elif action == "cursor_position":
110
+ return await common.cursor_position(timeout=timeout)
111
+ elif action == "left_click":
112
+ return await common.left_click(timeout=timeout)
113
+ elif action == "right_click":
114
+ return await common.right_click(timeout=timeout)
115
+ elif action == "middle_click":
116
+ return await common.middle_click(timeout=timeout)
117
+ elif action == "double_click":
118
+ return await common.double_click(timeout=timeout)
119
+
120
+ raise ToolParsingError(f"Invalid action: {action}")
121
+
122
+ # if max_screenshots is specified then polk model input into where @tool can find it
123
+ if max_screenshots is not None:
124
+ setattr(execute, TOOL_INIT_MODEL_INPUT, _computer_model_input(max_screenshots))
125
+
126
+ return execute
127
+
128
+
129
+ def _computer_model_input(max_screenshots: int) -> ToolCallModelInput:
130
+ def model_input(
131
+ message_index: int, message_total: int, content: str | list[Content]
132
+ ) -> str | list[Content]:
133
+ # nothing to do for scalars
134
+ if isinstance(content, str):
135
+ return content
136
+
137
+ # if we are inside max_screenshots then return as is
138
+ elif (message_total - message_index) <= max_screenshots:
139
+ return content
140
+
141
+ # otherwise convert images to text placeholdrs
142
+ else:
143
+ input_content: list[Content] = []
144
+ for c in content:
145
+ if isinstance(c, ContentImage):
146
+ input_content.append(
147
+ ContentText(
148
+ text="Screenshot removed to reduce size of input. Please consult the latest screenshots for the most up to date state of the screen."
149
+ )
150
+ )
151
+ else:
152
+ input_content.append(c)
153
+ return input_content
154
+
155
+ return model_input