inspect-ai 0.3.92__py3-none-any.whl → 0.3.93__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. inspect_ai/_cli/eval.py +27 -0
  2. inspect_ai/_eval/eval.py +19 -2
  3. inspect_ai/_eval/evalset.py +4 -1
  4. inspect_ai/_eval/run.py +41 -0
  5. inspect_ai/_eval/task/generate.py +38 -44
  6. inspect_ai/_eval/task/log.py +26 -28
  7. inspect_ai/_eval/task/run.py +13 -20
  8. inspect_ai/_util/local_server.py +368 -0
  9. inspect_ai/_util/working.py +10 -4
  10. inspect_ai/_view/www/dist/assets/index.css +159 -146
  11. inspect_ai/_view/www/dist/assets/index.js +1020 -1061
  12. inspect_ai/_view/www/log-schema.json +4 -3
  13. inspect_ai/_view/www/package.json +1 -1
  14. inspect_ai/_view/www/src/@types/log.d.ts +3 -2
  15. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
  16. inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
  17. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
  18. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
  19. inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
  20. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
  21. inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
  22. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
  23. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
  24. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
  25. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
  26. inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
  27. inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
  28. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
  29. inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
  30. inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
  31. inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
  32. inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
  33. inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
  34. inspect_ai/_view/www/src/components/Card.css +0 -1
  35. inspect_ai/_view/www/src/constants.ts +2 -0
  36. inspect_ai/_view/www/src/utils/numeric.ts +17 -0
  37. inspect_ai/agent/_agent.py +3 -3
  38. inspect_ai/agent/_as_solver.py +20 -12
  39. inspect_ai/agent/_as_tool.py +15 -3
  40. inspect_ai/agent/_handoff.py +8 -1
  41. inspect_ai/agent/_run.py +11 -3
  42. inspect_ai/log/__init__.py +4 -0
  43. inspect_ai/log/_file.py +56 -0
  44. inspect_ai/log/_log.py +99 -0
  45. inspect_ai/log/_recorders/__init__.py +2 -0
  46. inspect_ai/log/_recorders/buffer/database.py +12 -11
  47. inspect_ai/log/_recorders/buffer/filestore.py +2 -2
  48. inspect_ai/log/_recorders/buffer/types.py +2 -2
  49. inspect_ai/log/_recorders/eval.py +20 -65
  50. inspect_ai/log/_recorders/file.py +28 -6
  51. inspect_ai/log/_recorders/recorder.py +7 -0
  52. inspect_ai/log/_recorders/types.py +1 -23
  53. inspect_ai/log/_samples.py +0 -8
  54. inspect_ai/log/_transcript.py +7 -1
  55. inspect_ai/log/_util.py +52 -0
  56. inspect_ai/model/__init__.py +5 -1
  57. inspect_ai/model/_call_tools.py +32 -12
  58. inspect_ai/model/_generate_config.py +14 -8
  59. inspect_ai/model/_model.py +21 -48
  60. inspect_ai/model/_model_output.py +25 -0
  61. inspect_ai/model/_openai.py +2 -0
  62. inspect_ai/model/_providers/anthropic.py +13 -23
  63. inspect_ai/model/_providers/openai_o1.py +8 -2
  64. inspect_ai/model/_providers/providers.py +18 -4
  65. inspect_ai/model/_providers/sglang.py +241 -0
  66. inspect_ai/model/_providers/vllm.py +207 -400
  67. inspect_ai/solver/__init__.py +7 -2
  68. inspect_ai/solver/_basic_agent.py +3 -10
  69. inspect_ai/solver/_task_state.py +26 -88
  70. inspect_ai/tool/_json_rpc_helpers.py +45 -17
  71. inspect_ai/tool/_mcp/_mcp.py +2 -0
  72. inspect_ai/tool/_mcp/_sandbox.py +8 -2
  73. inspect_ai/tool/_mcp/server.py +3 -1
  74. inspect_ai/tool/_tool_call.py +4 -1
  75. inspect_ai/tool/_tool_support_helpers.py +51 -12
  76. inspect_ai/tool/_tools/_bash_session.py +190 -68
  77. inspect_ai/tool/_tools/_computer/_computer.py +25 -1
  78. inspect_ai/tool/_tools/_text_editor.py +4 -3
  79. inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
  80. inspect_ai/util/__init__.py +12 -0
  81. inspect_ai/util/_limit.py +393 -0
  82. inspect_ai/util/_limited_conversation.py +57 -0
  83. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/METADATA +1 -1
  84. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/RECORD +89 -108
  85. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/WHEEL +1 -1
  86. inspect_ai/solver/_limit.py +0 -39
  87. inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
  88. inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
  89. inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
  90. inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
  91. inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
  92. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
  93. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
  94. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  95. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
  96. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
  97. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
  98. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
  99. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
  100. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
  101. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
  102. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
  103. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
  104. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
  105. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
  106. inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
  107. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
  108. inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
  109. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
  110. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
  111. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
  112. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  113. inspect_ai/tool/_tools/_computer/test_args.py +0 -151
  114. /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
  115. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/entry_points.txt +0 -0
  116. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/licenses/LICENSE +0 -0
  117. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,27 @@
1
- from pydantic import BaseModel, Field, RootModel
1
+ from textwrap import dedent
2
+ from typing import Annotated, Literal
3
+
4
+ from pydantic import BaseModel, Discriminator, Field, RootModel
5
+ from semver import Version
2
6
  from shortuuid import uuid
3
7
 
8
+ from inspect_ai._util.error import PrerequisiteError
4
9
  from inspect_ai.tool import ToolResult
5
- from inspect_ai.tool._tool_support_helpers import (
6
- exec_model_request,
7
- tool_container_sandbox,
8
- )
9
10
  from inspect_ai.util import StoreModel, store_as
11
+ from inspect_ai.util._sandbox.environment import SandboxEnvironment
10
12
 
11
13
  from .._tool import Tool, ToolParsingError, tool
12
- from .._tool_call import ToolCall, ToolCallContent, ToolCallView, ToolCallViewer
13
-
14
+ from .._tool_support_helpers import (
15
+ exec_model_request,
16
+ exec_scalar_request,
17
+ tool_support_sandbox,
18
+ )
14
19
 
15
20
  # These models are cloned from the container code. If/when we decide to create
16
21
  # a package that is shared between the inspect and tool-container codebases, we'll
17
22
  # just have to live with it.
23
+
24
+
18
25
  class NewSessionResult(BaseModel):
19
26
  session_name: str
20
27
 
@@ -23,106 +30,221 @@ class BashRestartResult(BaseModel):
23
30
  pass
24
31
 
25
32
 
26
- class BashCommandResult(BaseModel):
27
- status: int
28
- stdout: str
29
- stderr: str
33
+ class BashSessionStore(StoreModel):
34
+ session_id: str = Field(default_factory=str)
35
+ sandbox: SandboxEnvironment | None = Field(default=None)
30
36
 
31
37
 
32
- class BashResult(RootModel[BashRestartResult | BashCommandResult]):
33
- pass
38
+ # Action-specific parameter models
34
39
 
35
40
 
36
- class BashSessionStore(StoreModel):
37
- session_id: str = Field(default_factory=str)
41
+ class TypeParams(BaseModel):
42
+ action: Literal["type"] = "type"
43
+ input: str
38
44
 
39
45
 
40
- # custom viewer for bash
41
- def code_viewer(language: str, code_param: str) -> ToolCallViewer:
42
- def viewer(tool_call: ToolCall) -> ToolCallView:
43
- code = tool_call.arguments.get(code_param, None)
44
- code = (code or tool_call.function).strip()
45
- call = ToolCallContent(
46
- title=language,
47
- format="markdown",
48
- content=f"```{language}\n" + code + "\n```\n",
49
- )
50
- return ToolCallView(call=call)
46
+ class TypeSubmitParams(BaseModel):
47
+ action: Literal["type_submit"] = "type_submit"
48
+ input: str
49
+
50
+
51
+ class RestartParams(BaseModel):
52
+ action: Literal["restart"] = "restart"
53
+
51
54
 
52
- return viewer
55
+ class ReadParams(BaseModel):
56
+ action: Literal["read"] = "read"
53
57
 
54
58
 
55
- @tool(viewer=code_viewer("bash", "command"))
56
- def bash_session(*, timeout: int | None = None, instance: str | None = uuid()) -> Tool:
57
- """Bash shell session command execution tool.
59
+ class InterruptParams(BaseModel):
60
+ action: Literal["interrupt"] = "interrupt"
58
61
 
59
- Execute bash shell commands in a long running session using a sandbox environment (e.g. "docker").
62
+
63
+ class BashSessionParams(
64
+ RootModel[
65
+ TypeParams | TypeSubmitParams | RestartParams | ReadParams | InterruptParams
66
+ ]
67
+ ):
68
+ root: Annotated[
69
+ TypeParams | TypeSubmitParams | RestartParams | ReadParams | InterruptParams,
70
+ Discriminator("action"),
71
+ ]
72
+
73
+
74
+ DEFAULT_WAIT_FOR_OUTPUT = 30
75
+ DEFAULT_IDLE_TIME = 0.5
76
+ # this is how long we're willing to wait for the basic RPC call overhead.
77
+ TRANSPORT_TIMEOUT = 5
78
+
79
+
80
+ @tool()
81
+ def bash_session(
82
+ *,
83
+ timeout: int | None = None, # default is max_wait + 5 seconds
84
+ wait_for_output: int | None = None, # default is 30 seconds
85
+ instance: str | None = uuid(),
86
+ ) -> Tool:
87
+ """Interactive bash shell session tool.
88
+
89
+ Interact with a bash shell in a long running session using a sandbox
90
+ environment (e.g. "docker"). This tool allows sending text to the shell,
91
+ which could be a command followed by a newline character or any other input
92
+ text such as the response to a password prompt.
60
93
 
61
94
  By default, a separate bash process is created within the sandbox for each
62
- call to `bash_session()`. You can modify this behavior by passing `instance=None`
63
- (which will result in a single bash process for the entire sample) or use other
64
- `instance` values that implement another scheme).
95
+ call to `bash_session()`. You can modify this behavior by passing
96
+ `instance=None` (which will result in a single bash process for the entire
97
+ sample) or use other `instance` values that implement another scheme).
65
98
 
66
99
  See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-bash-session>.
67
100
 
68
101
  Args:
69
102
  timeout: Timeout (in seconds) for command.
103
+ wait_for_output: Maximum time (in seconds) to wait for output. If no
104
+ output is received within this period, the function will return an
105
+ empty string. The model may need to make multiple tool calls to obtain
106
+ all output from a given command.
70
107
  instance: Instance id (each unique instance id has its own bash process)
71
108
 
72
109
  Returns:
73
- String with command output (stdout) or command error (stderr).
110
+ String with output from the shell.
74
111
  """
112
+ wait_for_output = wait_for_output or DEFAULT_WAIT_FOR_OUTPUT
113
+ min_timeout = wait_for_output + TRANSPORT_TIMEOUT
114
+ if timeout is None:
115
+ timeout = min_timeout
116
+ elif timeout < min_timeout:
117
+ raise ValueError(
118
+ f"Timeout must be at least {min_timeout} seconds, but got {timeout}."
119
+ )
75
120
 
76
121
  async def execute(
77
- command: str | None = None,
78
- restart: bool | None = None,
122
+ action: Literal["type", "type_submit", "restart", "read", "interrupt"],
123
+ input: str | None = None,
79
124
  ) -> ToolResult:
80
- """
81
- Use this function to execute bash commands.
125
+ r"""
126
+ Interact with a bash shell.
127
+
128
+ Interact with a bash shell by sending it input text and retrieving output
129
+ from it. There is no guarantee that all output will be returned in a
130
+ single call. Call this function multiple times to retrieve additional
131
+ output from the shell.
132
+
133
+ USAGE NOTES:
134
+ - Ensure that the shell is at a command prompt (typically when the
135
+ output ends in "$ " or "# ") before submitting a new command.
136
+ - Control characters must be sent as Unicode escape sequences (e.g., use
137
+ "\u0003" for Ctrl+C/ETX, "\u0004" for Ctrl+D/EOT). The literal string
138
+ "Ctrl+C" will not be interpreted as a control character.
139
+ - Use the "read" action to retrieve output from the shell without
140
+ sending any input. This is useful for long-running commands that
141
+ produce output over time. The "read" action will return any new output
142
+ since the last call.
143
+ - If a long-running command is in progress, additional input to execute
144
+ a new command will not be processed until the previous completes. To
145
+ abort a long-running command, use the "interrupt" action:
146
+ `bash_session(action="interrupt")`
147
+
148
+ Example use case:
149
+ - For a short-running command with a nominal amount of output, a single
150
+ call may suffice.
151
+ ```
152
+ bash_session(action="type_submit", input="echo foo") -> "foo\nuser@host:/# "
153
+ ```
154
+ - For a long-running command with output over time, multiple calls to are needed.
155
+ ```
156
+ bash_session(action="type_submit", input="tail -f /tmp/foo.log") -> <some output>
157
+ bash_session(action="read") -> <more output>
158
+ # Send interrupt (Ctrl+C)
159
+ bash_session(action="interrupt") -> "<final output>^Cuser@host:/# "
160
+ ```
161
+ - Interactive command awaiting more input from the user.
162
+ ```
163
+ bash_session(action="type_submit", input="ssh fred@foo.com") -> "foo.com's password: "
164
+ bash_session(action="type_submit", input="secret") -> "fred@foo.com:~$ "
165
+ ```
82
166
 
83
167
  Args:
84
- command: The bash command to run. Required unless the tool is being restarted.
85
- restart: Specifying true will restart this tool. Otherwise, leave this unspecified.
168
+ action: The action to execute:
169
+ - "type": Send input without a return key
170
+ - "type_submit": Send input followed by a return key
171
+ - "read": Read any new output without sending input
172
+ - "interrupt": Send a Ctrl+C (ETX character) to interrupt the current process
173
+ - "restart": Restart the bash session
174
+ input: The input to send to the shell.
175
+ Required for "type". Optional for "type_submit" actions. Must
176
+ not be provided for "restart", "read", or "interrupt" actions.
86
177
 
87
178
  Returns:
88
- The output of the command.
179
+ The accumulated output of the shell.
89
180
  """
90
- if not ((command is None) ^ (restart is None)):
91
- raise ToolParsingError(
92
- "Either 'command' or 'restart' must be specified, but not both."
93
- )
94
- params: dict[str, object] = {"command": command, "restart": restart}
181
+ # Validate parameters based on action
182
+ match action:
183
+ case "type":
184
+ if input is None:
185
+ raise ToolParsingError(
186
+ f"'input' is required for '{action}' action."
187
+ )
188
+ case "restart" | "read" | "interrupt":
189
+ if input is not None:
190
+ raise ToolParsingError(
191
+ f"Do not provide 'input' with '{action}' action."
192
+ )
95
193
 
96
- sandbox = await tool_container_sandbox("bash session")
97
194
  store = store_as(BashSessionStore, instance=instance)
195
+ sandbox = await _get_sandbox(store)
98
196
 
99
197
  if not store.session_id:
100
198
  store.session_id = (
101
199
  await exec_model_request(
102
- sandbox=sandbox,
103
- method="bash_session_new_session",
104
- params={},
105
- result_type=NewSessionResult,
106
- timeout=timeout,
200
+ sandbox,
201
+ "bash_session_new_session",
202
+ {},
203
+ NewSessionResult,
204
+ TRANSPORT_TIMEOUT,
107
205
  )
108
206
  ).session_name
109
207
 
110
- params["session_name"] = store.session_id
208
+ timing: dict[str, object] = {
209
+ "wait_for_output": wait_for_output,
210
+ "idle_timeout": DEFAULT_IDLE_TIME,
211
+ }
212
+ action_specific: dict[str, dict[str, object]] = {
213
+ "type": {"input": input, **timing},
214
+ "type_submit": {"input": f"{input}\n", **timing},
215
+ "interrupt": {"input": "\u0003", **timing},
216
+ "read": timing,
217
+ "restart": {"restart": True},
218
+ }
219
+
220
+ result = await exec_scalar_request(
221
+ sandbox,
222
+ "bash_session",
223
+ {"session_name": store.session_id, **(action_specific[action])},
224
+ str,
225
+ timeout,
226
+ )
111
227
 
112
- result = (
113
- await exec_model_request(
114
- sandbox=sandbox,
115
- method="bash_session",
116
- params=params,
117
- result_type=BashResult,
118
- timeout=timeout,
119
- )
120
- ).root
228
+ # Return the appropriate response
229
+ return (
230
+ "Bash session restarted."
231
+ if isinstance(result, BashRestartResult)
232
+ else result
233
+ )
121
234
 
122
- if isinstance(result, BashRestartResult):
123
- return "Bash session restarted."
235
+ return execute
124
236
 
125
- # return output (including stderr if any)
126
- return f"{result.stderr}\n{result.stdout}" if result.stderr else result.stdout
127
237
 
128
- return execute
238
+ async def _get_sandbox(store: BashSessionStore) -> SandboxEnvironment:
239
+ if not store.sandbox:
240
+ (sandbox, sandbox_version) = await tool_support_sandbox("bash session")
241
+ required_version = Version.parse("1.0.0")
242
+ if sandbox_version < required_version:
243
+ raise PrerequisiteError(
244
+ dedent(f"""
245
+ The 'inspect-tool-support' version in your container is '{sandbox_version}'. The 'bash_session' tool requires version '{required_version}' or newer. Please update your container image to the latest version of 'inspect-tool-support'.
246
+ """).strip()
247
+ )
248
+ store.sandbox = sandbox
249
+
250
+ return store.sandbox
@@ -6,7 +6,31 @@ from inspect_ai.tool._tool import TOOL_INIT_MODEL_INPUT, ToolParsingError
6
6
  from inspect_ai.tool._tool_call import ToolCallModelInput, ToolCallModelInputHints
7
7
 
8
8
  from . import _common as common
9
- from ._resources.tool._constants import Action
9
+
10
+ # this is duplicated from ._resources.tool._constants import Action
11
+ # changes should be synchronized!
12
+
13
+ Action = Literal[
14
+ "key",
15
+ "hold_key",
16
+ "type",
17
+ "cursor_position",
18
+ "mouse_move",
19
+ "left_mouse_down",
20
+ "left_mouse_up",
21
+ "left_click",
22
+ "left_click_drag",
23
+ "right_click",
24
+ "middle_click",
25
+ "back_click",
26
+ "forward_click",
27
+ "double_click",
28
+ "triple_click",
29
+ "scroll",
30
+ "wait",
31
+ "screenshot",
32
+ ]
33
+
10
34
 
11
35
  ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
12
36
 
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Discriminator, RootModel
6
6
  from inspect_ai.tool import ToolResult
7
7
  from inspect_ai.tool._tool_support_helpers import (
8
8
  exec_scalar_request,
9
- tool_container_sandbox,
9
+ tool_support_sandbox,
10
10
  )
11
11
 
12
12
  from .._tool import Tool, tool
@@ -70,12 +70,13 @@ def text_editor(timeout: int | None = None, user: str | None = None) -> Tool:
70
70
  that a change made to a file by on Subtask will be visible to another Subtask.
71
71
 
72
72
  Args:
73
- timeout: Timeout (in seconds) for command.
73
+ timeout: Timeout (in seconds) for command. Defaults to 180 if not provided.
74
74
  user: User to execute commands as.
75
75
 
76
76
  Returns:
77
77
  String with command output (stdout) or command error (stderr).
78
78
  """
79
+ timeout = timeout or 180
79
80
 
80
81
  async def execute(
81
82
  command: Literal["view", "create", "str_replace", "insert", "undo_edit"],
@@ -101,7 +102,7 @@ def text_editor(timeout: int | None = None, user: str | None = None) -> Tool:
101
102
  Returns:
102
103
  The output of the command.
103
104
  """
104
- sandbox = await tool_container_sandbox("editor")
105
+ (sandbox, _) = await tool_support_sandbox("editor")
105
106
 
106
107
  # Create a dictionary of the parameters
107
108
  params = {
@@ -10,7 +10,7 @@ from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
10
10
  from inspect_ai.tool._tool_info import parse_tool_info
11
11
  from inspect_ai.tool._tool_support_helpers import (
12
12
  exec_model_request,
13
- tool_container_sandbox,
13
+ tool_support_sandbox,
14
14
  )
15
15
  from inspect_ai.tool._tool_with import tool_with
16
16
  from inspect_ai.util._store_model import StoreModel, store_as
@@ -397,8 +397,10 @@ def web_browser_refresh(instance: str | None = None) -> Tool:
397
397
  async def _web_browser_cmd(
398
398
  tool_name: str, instance: str | None, params: dict[str, object]
399
399
  ) -> ToolResult:
400
+ # TODO: Is it worth it to plumb this down from the @tool?
401
+ timeout = 180
400
402
  try:
401
- sandbox_env = await tool_container_sandbox("web browser")
403
+ (sandbox_env, _) = await tool_support_sandbox("web browser")
402
404
  except PrerequisiteError as e:
403
405
  # The user may have the old, incompatible, sandbox. If so, use that and
404
406
  # execute the old compatible code.
@@ -419,13 +421,18 @@ async def _web_browser_cmd(
419
421
  method="web_new_session",
420
422
  params={"headful": False},
421
423
  result_type=NewSessionResult,
424
+ timeout=timeout,
422
425
  )
423
426
  ).session_name
424
427
 
425
428
  params["session_name"] = store.session_id
426
429
 
427
430
  crawler_result = await exec_model_request(
428
- sandbox=sandbox_env, method=tool_name, params=params, result_type=CrawlerResult
431
+ sandbox=sandbox_env,
432
+ method=tool_name,
433
+ params=params,
434
+ result_type=CrawlerResult,
435
+ timeout=timeout,
429
436
  )
430
437
  if crawler_result.error and crawler_result.error.strip() != "":
431
438
  raise ToolError(crawler_result.error)
@@ -1,5 +1,12 @@
1
1
  from inspect_ai._util.registry import RegistryType, registry_create
2
2
  from inspect_ai._util.trace import trace_action, trace_message
3
+ from inspect_ai.util._limit import (
4
+ Limit,
5
+ LimitExceededError,
6
+ apply_limits,
7
+ message_limit,
8
+ token_limit,
9
+ )
3
10
 
4
11
  from ._concurrency import concurrency
5
12
  from ._console import input_screen
@@ -31,6 +38,7 @@ from ._subtask import Subtask, subtask
31
38
  from ._throttle import throttle
32
39
 
33
40
  __all__ = [
41
+ "apply_limits",
34
42
  "ExecResult",
35
43
  "concurrency",
36
44
  "DisplayType",
@@ -42,9 +50,12 @@ __all__ = [
42
50
  "JSONType",
43
51
  "JSONSchema",
44
52
  "json_schema",
53
+ "Limit",
54
+ "message_limit",
45
55
  "OutputLimitExceededError",
46
56
  "resource",
47
57
  "subprocess",
58
+ "LimitExceededError",
48
59
  "SandboxEnvironment",
49
60
  "SandboxEnvironmentConfigType",
50
61
  "SandboxEnvironmentLimits",
@@ -63,6 +74,7 @@ __all__ = [
63
74
  "Subtask",
64
75
  "subtask",
65
76
  "throttle",
77
+ "token_limit",
66
78
  "trace_action",
67
79
  "trace_message",
68
80
  "RegistryType",