inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +3 -4
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +48 -0
- inspect_ai/_eval/eval.py +36 -24
- inspect_ai/_eval/evalset.py +17 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +8 -13
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/file.py +13 -0
- inspect_ai/_util/json.py +2 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +304 -128
- inspect_ai/_view/www/dist/assets/index.js +47495 -27519
- inspect_ai/_view/www/log-schema.json +124 -31
- inspect_ai/_view/www/package.json +3 -0
- inspect_ai/_view/www/src/App.tsx +12 -0
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
- inspect_ai/_view/www/src/state/hooks.ts +5 -3
- inspect_ai/_view/www/src/state/logPolling.ts +5 -1
- inspect_ai/_view/www/src/state/logSlice.ts +10 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
- inspect_ai/_view/www/src/types/log.d.ts +34 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
- inspect_ai/_view/www/yarn.lock +94 -1
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_log.py +11 -2
- inspect_ai/log/_transcript.py +13 -9
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +256 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_model.py +113 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +2 -2
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
|
|
1
1
|
import re
|
2
2
|
|
3
3
|
from pydantic import BaseModel, Field
|
4
|
+
from shortuuid import uuid
|
4
5
|
|
5
6
|
from inspect_ai._util.content import ContentText
|
6
7
|
from inspect_ai._util.error import PrerequisiteError
|
@@ -31,22 +32,30 @@ class CrawlerResult(BaseModel):
|
|
31
32
|
error: str | None = None
|
32
33
|
|
33
34
|
|
34
|
-
def web_browser(
|
35
|
+
def web_browser(
|
36
|
+
*, interactive: bool = True, instance: str | None = uuid()
|
37
|
+
) -> list[Tool]:
|
35
38
|
"""Tools used for web browser navigation.
|
36
39
|
|
37
|
-
|
40
|
+
By default, a separate web browser process is created within the sandbox for each
|
41
|
+
call to `web_browser()`. You can modify this behavior by passing `instance=None`
|
42
|
+
(which will result in a single web browser for the entire sample) or use other
|
43
|
+
`instance` values that implement another scheme).
|
44
|
+
|
45
|
+
See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
|
38
46
|
|
39
47
|
Args:
|
40
48
|
interactive: Provide interactive tools (enable
|
41
49
|
clicking, typing, and submitting forms). Defaults
|
42
50
|
to True.
|
51
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
43
52
|
|
44
53
|
Returns:
|
45
54
|
List of tools used for web browser navigation.
|
46
55
|
|
47
56
|
"""
|
48
57
|
# start with go tool (excluding interactive docs if necessary)
|
49
|
-
go = web_browser_go()
|
58
|
+
go = web_browser_go(instance)
|
50
59
|
if not interactive:
|
51
60
|
go = go_without_interactive_docs(go)
|
52
61
|
tools = [go]
|
@@ -54,24 +63,27 @@ def web_browser(interactive: bool = True) -> list[Tool]:
|
|
54
63
|
# add interactive tools if requested
|
55
64
|
if interactive:
|
56
65
|
tools = tools + [
|
57
|
-
web_browser_click(),
|
58
|
-
web_browser_type_submit(),
|
59
|
-
web_browser_type(),
|
66
|
+
tool_with_web_at_viewer(web_browser_click(instance), instance),
|
67
|
+
tool_with_web_at_viewer(web_browser_type_submit(instance), instance),
|
68
|
+
tool_with_web_at_viewer(web_browser_type(instance), instance),
|
60
69
|
]
|
61
70
|
|
62
71
|
# add navigational tools
|
63
72
|
return tools + [
|
64
|
-
web_browser_scroll(),
|
65
|
-
web_browser_back(),
|
66
|
-
web_browser_forward(),
|
67
|
-
web_browser_refresh(),
|
73
|
+
web_browser_scroll(instance),
|
74
|
+
web_browser_back(instance),
|
75
|
+
web_browser_forward(instance),
|
76
|
+
web_browser_refresh(instance),
|
68
77
|
]
|
69
78
|
|
70
79
|
|
71
80
|
@tool(parallel=False)
|
72
|
-
def web_browser_go() -> Tool:
|
81
|
+
def web_browser_go(instance: str | None = None) -> Tool:
|
73
82
|
"""Web Browser tool for navigation to a URL.
|
74
83
|
|
84
|
+
Args:
|
85
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
86
|
+
|
75
87
|
Returns:
|
76
88
|
Web browser navigation tool.
|
77
89
|
"""
|
@@ -102,7 +114,7 @@ def web_browser_go() -> Tool:
|
|
102
114
|
Returns:
|
103
115
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
104
116
|
"""
|
105
|
-
return await _web_browser_cmd("web_go", locals())
|
117
|
+
return await _web_browser_cmd("web_go", instance, locals())
|
106
118
|
|
107
119
|
return execute
|
108
120
|
|
@@ -126,36 +138,44 @@ class WebBrowserStore(StoreModel):
|
|
126
138
|
session_id: str = Field(default_factory=str)
|
127
139
|
|
128
140
|
|
129
|
-
def
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
141
|
+
def tool_with_web_at_viewer(tool: Tool, instance: str | None = None) -> Tool:
|
142
|
+
def web_at_viewer(call: ToolCall) -> ToolCallView:
|
143
|
+
# get the web accessibility tree, if we have it create a view from it
|
144
|
+
web_at = store_as(WebBrowserStore, instance=instance).web_at
|
145
|
+
element_id = call.arguments.get("element_id", 0)
|
146
|
+
if web_at and element_id:
|
147
|
+
lines = web_at.splitlines()
|
148
|
+
pattern = re.compile(rf"^\s+\[{element_id}\] .*$")
|
149
|
+
for i, line in enumerate(lines):
|
150
|
+
if pattern.match(line):
|
151
|
+
snippet = (
|
152
|
+
lines[0:1]
|
153
|
+
+ [" ..."]
|
154
|
+
+ lines[max(i - 2, 1) : i]
|
155
|
+
+ [line.replace(" ", "*", 1)]
|
156
|
+
+ lines[i + 1 : min(i + 3, len(lines))]
|
157
|
+
+ [" ..."]
|
158
|
+
)
|
159
|
+
|
160
|
+
return ToolCallView(
|
161
|
+
context=ToolCallContent(
|
162
|
+
format="text", content="\n".join(snippet)
|
163
|
+
)
|
164
|
+
)
|
165
|
+
|
166
|
+
# no view found
|
167
|
+
return ToolCallView()
|
168
|
+
|
169
|
+
return tool_with(tool, viewer=web_at_viewer)
|
170
|
+
|
171
|
+
|
172
|
+
@tool(parallel=False)
|
173
|
+
def web_browser_click(instance: str | None = None) -> Tool:
|
157
174
|
"""Web Browser tool for clicking an element on a web page.
|
158
175
|
|
176
|
+
Args:
|
177
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
178
|
+
|
159
179
|
Returns:
|
160
180
|
Web browser clicking tool.
|
161
181
|
"""
|
@@ -182,15 +202,18 @@ def web_browser_click() -> Tool:
|
|
182
202
|
Returns:
|
183
203
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
184
204
|
"""
|
185
|
-
return await _web_browser_cmd("web_click", locals())
|
205
|
+
return await _web_browser_cmd("web_click", instance, locals())
|
186
206
|
|
187
207
|
return execute
|
188
208
|
|
189
209
|
|
190
|
-
@tool(
|
191
|
-
def web_browser_type_submit() -> Tool:
|
210
|
+
@tool(parallel=False)
|
211
|
+
def web_browser_type_submit(instance: str | None = None) -> Tool:
|
192
212
|
"""Web Browser tool for typing and submitting input.
|
193
213
|
|
214
|
+
Args:
|
215
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
216
|
+
|
194
217
|
Returns:
|
195
218
|
Web browser type and submit tool.
|
196
219
|
"""
|
@@ -220,15 +243,18 @@ def web_browser_type_submit() -> Tool:
|
|
220
243
|
Returns:
|
221
244
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
222
245
|
"""
|
223
|
-
return await _web_browser_cmd("web_type_submit", locals())
|
246
|
+
return await _web_browser_cmd("web_type_submit", instance, locals())
|
224
247
|
|
225
248
|
return execute
|
226
249
|
|
227
250
|
|
228
|
-
@tool(
|
229
|
-
def web_browser_type() -> Tool:
|
251
|
+
@tool(parallel=False)
|
252
|
+
def web_browser_type(instance: str | None = None) -> Tool:
|
230
253
|
"""Web Browser tool for typing into inputs.
|
231
254
|
|
255
|
+
Args:
|
256
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
257
|
+
|
232
258
|
Returns:
|
233
259
|
Web browser typing tool.
|
234
260
|
"""
|
@@ -258,15 +284,18 @@ def web_browser_type() -> Tool:
|
|
258
284
|
Returns:
|
259
285
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
260
286
|
"""
|
261
|
-
return await _web_browser_cmd("web_type", locals())
|
287
|
+
return await _web_browser_cmd("web_type", instance, locals())
|
262
288
|
|
263
289
|
return execute
|
264
290
|
|
265
291
|
|
266
292
|
@tool(parallel=False)
|
267
|
-
def web_browser_scroll() -> Tool:
|
293
|
+
def web_browser_scroll(instance: str | None = None) -> Tool:
|
268
294
|
"""Web Browser tool for scrolling up or down one page.
|
269
295
|
|
296
|
+
Args:
|
297
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
298
|
+
|
270
299
|
Returns:
|
271
300
|
Web browser scrolling tool.
|
272
301
|
"""
|
@@ -288,15 +317,18 @@ def web_browser_scroll() -> Tool:
|
|
288
317
|
Returns:
|
289
318
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
290
319
|
"""
|
291
|
-
return await _web_browser_cmd("web_scroll", locals())
|
320
|
+
return await _web_browser_cmd("web_scroll", instance, locals())
|
292
321
|
|
293
322
|
return execute
|
294
323
|
|
295
324
|
|
296
325
|
@tool(parallel=False)
|
297
|
-
def web_browser_back() -> Tool:
|
326
|
+
def web_browser_back(instance: str | None = None) -> Tool:
|
298
327
|
"""Web Browser tool for navigating back in the browser history.
|
299
328
|
|
329
|
+
Args:
|
330
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
331
|
+
|
300
332
|
Returns:
|
301
333
|
Web browser back navigation tool.
|
302
334
|
"""
|
@@ -309,15 +341,18 @@ def web_browser_back() -> Tool:
|
|
309
341
|
Returns:
|
310
342
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
311
343
|
"""
|
312
|
-
return await _web_browser_cmd("web_back", locals())
|
344
|
+
return await _web_browser_cmd("web_back", instance, locals())
|
313
345
|
|
314
346
|
return execute
|
315
347
|
|
316
348
|
|
317
349
|
@tool(parallel=False)
|
318
|
-
def web_browser_forward() -> Tool:
|
350
|
+
def web_browser_forward(instance: str | None = None) -> Tool:
|
319
351
|
"""Web Browser tool for navigating forward in the browser history.
|
320
352
|
|
353
|
+
Args:
|
354
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
355
|
+
|
321
356
|
Returns:
|
322
357
|
Web browser forward navigation tool.
|
323
358
|
"""
|
@@ -330,15 +365,18 @@ def web_browser_forward() -> Tool:
|
|
330
365
|
Returns:
|
331
366
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
332
367
|
"""
|
333
|
-
return await _web_browser_cmd("web_forward", locals())
|
368
|
+
return await _web_browser_cmd("web_forward", instance, locals())
|
334
369
|
|
335
370
|
return execute
|
336
371
|
|
337
372
|
|
338
373
|
@tool(parallel=False)
|
339
|
-
def web_browser_refresh() -> Tool:
|
374
|
+
def web_browser_refresh(instance: str | None = None) -> Tool:
|
340
375
|
"""Web Browser tool for refreshing the current page.
|
341
376
|
|
377
|
+
Args:
|
378
|
+
instance: Instance id (each unique instance id has its own web browser process)
|
379
|
+
|
342
380
|
Returns:
|
343
381
|
Web browser page refresh tool.
|
344
382
|
"""
|
@@ -351,12 +389,14 @@ def web_browser_refresh() -> Tool:
|
|
351
389
|
Returns:
|
352
390
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
353
391
|
"""
|
354
|
-
return await _web_browser_cmd("web_refresh", locals())
|
392
|
+
return await _web_browser_cmd("web_refresh", instance, locals())
|
355
393
|
|
356
394
|
return execute
|
357
395
|
|
358
396
|
|
359
|
-
async def _web_browser_cmd(
|
397
|
+
async def _web_browser_cmd(
|
398
|
+
tool_name: str, instance: str | None, params: dict[str, object]
|
399
|
+
) -> ToolResult:
|
360
400
|
try:
|
361
401
|
sandbox_env = await tool_container_sandbox("web browser")
|
362
402
|
except PrerequisiteError as e:
|
@@ -369,7 +409,8 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
|
|
369
409
|
except PrerequisiteError:
|
370
410
|
raise e
|
371
411
|
|
372
|
-
store
|
412
|
+
# bind to store (use instance id if provided)
|
413
|
+
store = store_as(WebBrowserStore, instance=instance)
|
373
414
|
|
374
415
|
if not store.session_id:
|
375
416
|
store.session_id = (
|
@@ -397,10 +438,8 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
|
|
397
438
|
line.partition("data:image/png;base64")[0] for line in web_at_lines
|
398
439
|
]
|
399
440
|
|
400
|
-
|
401
|
-
|
402
|
-
)
|
403
|
-
store_as(WebBrowserStore).web_at = web_at
|
441
|
+
store.main_content = main_content or "(no main text summary)"
|
442
|
+
store.web_at = web_at
|
404
443
|
|
405
444
|
web_at = "\n".join(web_at_lines)
|
406
445
|
return (
|
inspect_ai/util/__init__.py
CHANGED
@@ -16,6 +16,7 @@ from ._sandbox import (
|
|
16
16
|
SandboxEnvironmentSpec,
|
17
17
|
SandboxEnvironmentType,
|
18
18
|
sandbox,
|
19
|
+
sandbox_default,
|
19
20
|
sandbox_with,
|
20
21
|
sandboxenv,
|
21
22
|
)
|
@@ -53,6 +54,7 @@ __all__ = [
|
|
53
54
|
"sandboxenv",
|
54
55
|
"sandbox",
|
55
56
|
"sandbox_with",
|
57
|
+
"sandbox_default",
|
56
58
|
"Store",
|
57
59
|
"store",
|
58
60
|
"StoreModel",
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import sys
|
2
|
+
|
3
|
+
if sys.version_info < (3, 11):
|
4
|
+
from exceptiongroup import ExceptionGroup
|
5
|
+
|
6
|
+
|
7
|
+
def inner_exception(exc: Exception) -> Exception:
|
8
|
+
flattended = flatten_exception_group(exc)
|
9
|
+
return flattended[0]
|
10
|
+
|
11
|
+
|
12
|
+
def flatten_exception_group(exc: Exception) -> list[Exception]:
|
13
|
+
"""Recursively flatten an ExceptionGroup to get all contained exceptions."""
|
14
|
+
if (
|
15
|
+
hasattr(exc, "__context__")
|
16
|
+
and exc.__context__ is not None
|
17
|
+
and isinstance(exc.__context__, Exception)
|
18
|
+
):
|
19
|
+
return flatten_exception_group(exc.__context__) + [exc]
|
20
|
+
|
21
|
+
if isinstance(exc, ExceptionGroup):
|
22
|
+
flattened = []
|
23
|
+
for nested_exc in exc.exceptions:
|
24
|
+
flattened.extend(flatten_exception_group(nested_exc))
|
25
|
+
return flattened
|
26
|
+
|
27
|
+
return [exc]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# note: unused imports are still required to ensure that our built-in sandbox environments are registered
|
2
2
|
|
3
|
-
from .context import sandbox, sandbox_with
|
3
|
+
from .context import sandbox, sandbox_default, sandbox_with
|
4
4
|
from .docker.docker import DockerSandboxEnvironment # noqa: F401
|
5
5
|
from .environment import (
|
6
6
|
SandboxConnection,
|
@@ -26,4 +26,5 @@ __all__ = [
|
|
26
26
|
"sandboxenv",
|
27
27
|
"sandbox",
|
28
28
|
"sandbox_with",
|
29
|
+
"sandbox_default",
|
29
30
|
]
|
@@ -1,6 +1,7 @@
|
|
1
|
+
from contextlib import contextmanager
|
1
2
|
from contextvars import ContextVar
|
2
3
|
from logging import getLogger
|
3
|
-
from typing import Any, NoReturn, cast
|
4
|
+
from typing import Any, Iterator, NoReturn, cast
|
4
5
|
|
5
6
|
from shortuuid import uuid
|
6
7
|
|
@@ -39,7 +40,7 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
|
|
39
40
|
|
40
41
|
# For None, 'default', or a single environment only take the first environment
|
41
42
|
if name is None or name == "default" or len(environments) == 1:
|
42
|
-
return
|
43
|
+
return default_sandbox_environment(environments)
|
43
44
|
else:
|
44
45
|
environment = environments.get(name, None)
|
45
46
|
if not environment:
|
@@ -146,6 +147,12 @@ async def init_sandbox_environments_sample(
|
|
146
147
|
environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
|
147
148
|
|
148
149
|
try:
|
150
|
+
# set context
|
151
|
+
sandbox_environments_context_var.set(environments)
|
152
|
+
sandbox_with_environments_context_var.set({})
|
153
|
+
default_name = next(iter(environments.keys()))
|
154
|
+
sandbox_default_context_var.set(default_name)
|
155
|
+
|
149
156
|
# copy files into environments
|
150
157
|
await copy_sandbox_environment_files(files, environments)
|
151
158
|
|
@@ -153,10 +160,6 @@ async def init_sandbox_environments_sample(
|
|
153
160
|
if setup:
|
154
161
|
await setup_sandbox_environment(setup, environments)
|
155
162
|
|
156
|
-
# set context
|
157
|
-
sandbox_environments_context_var.set(environments)
|
158
|
-
sandbox_with_environments_context_var.set({})
|
159
|
-
|
160
163
|
# return environments
|
161
164
|
return environments
|
162
165
|
|
@@ -239,7 +242,13 @@ async def setup_sandbox_environment(
|
|
239
242
|
def default_sandbox_environment(
|
240
243
|
environments: dict[str, SandboxEnvironment],
|
241
244
|
) -> SandboxEnvironment:
|
242
|
-
|
245
|
+
default_name = sandbox_default_context_var.get()
|
246
|
+
if default_name in environments:
|
247
|
+
return environments[default_name]
|
248
|
+
else:
|
249
|
+
raise ValueError(
|
250
|
+
f"Default sandbox environment '{default_name}' not found in environments"
|
251
|
+
)
|
243
252
|
|
244
253
|
|
245
254
|
def validate_sandbox_environments(
|
@@ -253,6 +262,20 @@ def validate_sandbox_environments(
|
|
253
262
|
)
|
254
263
|
|
255
264
|
|
265
|
+
@contextmanager
|
266
|
+
def sandbox_default(name: str) -> Iterator[None]:
|
267
|
+
"""Set the default sandbox environment for the current context.
|
268
|
+
|
269
|
+
Args:
|
270
|
+
name: Sandbox to set as the default.
|
271
|
+
"""
|
272
|
+
token = sandbox_default_context_var.set(name)
|
273
|
+
try:
|
274
|
+
yield
|
275
|
+
finally:
|
276
|
+
sandbox_default_context_var.reset(token)
|
277
|
+
|
278
|
+
|
256
279
|
sandbox_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
|
257
280
|
"sandbox_environments"
|
258
281
|
)
|
@@ -260,3 +283,5 @@ sandbox_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
|
|
260
283
|
sandbox_with_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
|
261
284
|
"sandbox_with_environments"
|
262
285
|
)
|
286
|
+
|
287
|
+
sandbox_default_context_var = ContextVar[str]("sandbox_default")
|
@@ -25,6 +25,10 @@ def project_startup(project: ComposeProject) -> None:
|
|
25
25
|
running_projects().append(project)
|
26
26
|
|
27
27
|
# track auto compose we need to cleanup
|
28
|
+
project_record_auto_compose(project)
|
29
|
+
|
30
|
+
|
31
|
+
def project_record_auto_compose(project: ComposeProject) -> None:
|
28
32
|
if project.config and is_auto_compose_file(project.config):
|
29
33
|
auto_compose_files().add(project.config)
|
30
34
|
|
@@ -331,8 +331,8 @@ async def compose_command(
|
|
331
331
|
retries = 0
|
332
332
|
while True:
|
333
333
|
try:
|
334
|
-
command_timeout = (
|
335
|
-
timeout if retries == 0 else (min(timeout, 60) // retries)
|
334
|
+
command_timeout = max(
|
335
|
+
timeout if retries == 0 else (min(timeout, 60) // retries), 1
|
336
336
|
)
|
337
337
|
return await run_command(command_timeout)
|
338
338
|
except TimeoutError:
|
@@ -30,6 +30,7 @@ from .cleanup import (
|
|
30
30
|
project_cleanup,
|
31
31
|
project_cleanup_shutdown,
|
32
32
|
project_cleanup_startup,
|
33
|
+
project_record_auto_compose,
|
33
34
|
project_startup,
|
34
35
|
)
|
35
36
|
from .compose import (
|
@@ -78,6 +79,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
78
79
|
name=task_project_name(task_name), config=config
|
79
80
|
)
|
80
81
|
|
82
|
+
# record auto compose
|
83
|
+
project_record_auto_compose(project)
|
84
|
+
|
81
85
|
# build containers which are out of date
|
82
86
|
await compose_build(project)
|
83
87
|
|
@@ -310,7 +314,14 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
310
314
|
# write the file
|
311
315
|
if isinstance(contents, str):
|
312
316
|
result = await self.exec(
|
313
|
-
[
|
317
|
+
[
|
318
|
+
"sh",
|
319
|
+
"-e",
|
320
|
+
"-c",
|
321
|
+
'tee -- "$1" > /dev/null',
|
322
|
+
"write_file_script",
|
323
|
+
file,
|
324
|
+
],
|
314
325
|
input=contents,
|
315
326
|
timeout=TIMEOUT,
|
316
327
|
)
|
inspect_ai/util/_store_model.py
CHANGED
@@ -15,6 +15,7 @@ class StoreModel(BaseModel):
|
|
15
15
|
"""
|
16
16
|
|
17
17
|
store: Store = Field(exclude=True, default_factory=store)
|
18
|
+
instance: str | None = Field(exclude=True, default=None)
|
18
19
|
|
19
20
|
def model_post_init(self, __context: Any) -> None:
|
20
21
|
for name in self.model_fields.keys():
|
@@ -28,12 +29,18 @@ class StoreModel(BaseModel):
|
|
28
29
|
elif name in self.__dict__.keys():
|
29
30
|
self.store.set(ns_name, self.__dict__[name])
|
30
31
|
|
32
|
+
# validate that we aren't using a nested StoreModel
|
33
|
+
self._validate_value(name, self.__dict__[name])
|
34
|
+
|
31
35
|
def __getattribute__(self, name: str) -> Any:
|
32
36
|
# sidestep dunders and pydantic fields
|
33
37
|
if name.startswith("__") or name.startswith("model_"):
|
34
38
|
return object.__getattribute__(self, name)
|
35
|
-
# handle model_fields (except 'store') by reading the store
|
36
|
-
elif name in object.__getattribute__(self, "model_fields") and name
|
39
|
+
# handle model_fields (except 'store' and 'namespace') by reading the store
|
40
|
+
elif name in object.__getattribute__(self, "model_fields") and name not in [
|
41
|
+
"store",
|
42
|
+
"instance",
|
43
|
+
]:
|
37
44
|
store_key = self._ns_name(name)
|
38
45
|
if store_key in self.store:
|
39
46
|
return self.store.get(store_key)
|
@@ -44,6 +51,7 @@ class StoreModel(BaseModel):
|
|
44
51
|
return super().__getattribute__(name)
|
45
52
|
|
46
53
|
def __setattr__(self, name: str, value: Any) -> None:
|
54
|
+
self._validate_value(name, value)
|
47
55
|
if name in self.model_fields:
|
48
56
|
# validate with the new value (can throw ValidationError)
|
49
57
|
temp_data = self.store._data.copy()
|
@@ -86,11 +94,23 @@ class StoreModel(BaseModel):
|
|
86
94
|
# perform validation
|
87
95
|
self.__class__.model_validate(validate)
|
88
96
|
|
97
|
+
def _validate_value(self, name: str, value: Any) -> None:
|
98
|
+
# validate that we aren't using a nested StoreModel
|
99
|
+
if isinstance(value, StoreModel):
|
100
|
+
raise TypeError(
|
101
|
+
f"{name} is a StoreModel and you may not embed a StoreModel "
|
102
|
+
"inside another StoreModel (derive from BaseModel for fields in a StoreModel)."
|
103
|
+
)
|
104
|
+
|
89
105
|
def _ns_name(self, name: str) -> str:
|
90
|
-
|
106
|
+
namespace = f"{self.instance}:" if self.instance is not None else ""
|
107
|
+
return f"{self.__class__.__name__}:{namespace}{name}"
|
91
108
|
|
92
109
|
def _un_ns_name(self, name: str) -> str:
|
93
|
-
|
110
|
+
name = name.replace(f"{self.__class__.__name__}:", "", 1)
|
111
|
+
if self.instance:
|
112
|
+
name = name.replace(f"{self.instance}:", "", 1)
|
113
|
+
return name
|
94
114
|
|
95
115
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
96
116
|
|
@@ -98,13 +118,16 @@ class StoreModel(BaseModel):
|
|
98
118
|
SMT = TypeVar("SMT", bound=StoreModel)
|
99
119
|
|
100
120
|
|
101
|
-
def store_as(model_cls: Type[SMT]) -> SMT:
|
121
|
+
def store_as(model_cls: Type[SMT], instance: str | None = None) -> SMT:
|
102
122
|
"""Get a Pydantic model interface to the store.
|
103
123
|
|
104
124
|
Args:
|
105
125
|
model_cls: Pydantic model type (must derive from StoreModel)
|
126
|
+
instance: Optional instance name for store (enables multiple instances
|
127
|
+
of a given StoreModel type within a single sample)
|
128
|
+
|
106
129
|
|
107
130
|
Returns:
|
108
|
-
StoreModel:
|
131
|
+
StoreModel: model_cls bound to current Store.
|
109
132
|
"""
|
110
|
-
return model_cls(store=store())
|
133
|
+
return model_cls(store=store(), instance=instance)
|
inspect_ai/util/_subprocess.py
CHANGED
@@ -117,14 +117,15 @@ async def subprocess(
|
|
117
117
|
async def run_command() -> AsyncGenerator[
|
118
118
|
Union[Process, ExecResult[str], ExecResult[bytes]], None
|
119
119
|
]:
|
120
|
-
|
120
|
+
process = await open_process(
|
121
121
|
args,
|
122
122
|
stdin=PIPE if input else DEVNULL,
|
123
123
|
stdout=PIPE if capture_output else None,
|
124
124
|
stderr=PIPE if capture_output else None,
|
125
125
|
cwd=cwd,
|
126
126
|
env={**os.environ, **env},
|
127
|
-
)
|
127
|
+
)
|
128
|
+
try:
|
128
129
|
# yield the process so the caller has a handle to it
|
129
130
|
yield process
|
130
131
|
|
@@ -173,6 +174,15 @@ async def subprocess(
|
|
173
174
|
stdout=stdout if capture_output else bytes(),
|
174
175
|
stderr=stderr if capture_output else bytes(),
|
175
176
|
)
|
177
|
+
finally:
|
178
|
+
try:
|
179
|
+
await process.aclose()
|
180
|
+
except ProcessLookupError:
|
181
|
+
# the anyio ansycio backend calls process.kill() from within
|
182
|
+
# its aclose() method without an enclosing exception handler
|
183
|
+
# (which in turn can throw ProcessLookupError if the process
|
184
|
+
# is already gone)
|
185
|
+
pass
|
176
186
|
|
177
187
|
# wrapper for run command that implements timeout
|
178
188
|
async def run_command_timeout() -> Union[ExecResult[str], ExecResult[bytes]]:
|
@@ -181,7 +191,7 @@ async def subprocess(
|
|
181
191
|
proc = cast(Process, await anext(rc))
|
182
192
|
|
183
193
|
# await result wrapped in timeout handler if requested
|
184
|
-
if timeout:
|
194
|
+
if timeout is not None:
|
185
195
|
try:
|
186
196
|
with anyio.fail_after(timeout):
|
187
197
|
result = await anext(rc)
|