inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_display/textual/app.py +14 -3
  3. inspect_ai/_display/textual/display.py +4 -0
  4. inspect_ai/_display/textual/widgets/samples.py +9 -3
  5. inspect_ai/_display/textual/widgets/task_detail.py +3 -4
  6. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  7. inspect_ai/_display/textual/widgets/vscode.py +48 -0
  8. inspect_ai/_eval/eval.py +36 -24
  9. inspect_ai/_eval/evalset.py +17 -18
  10. inspect_ai/_eval/loader.py +34 -11
  11. inspect_ai/_eval/run.py +8 -13
  12. inspect_ai/_eval/score.py +13 -3
  13. inspect_ai/_eval/task/generate.py +8 -9
  14. inspect_ai/_eval/task/log.py +2 -0
  15. inspect_ai/_eval/task/task.py +23 -9
  16. inspect_ai/_util/file.py +13 -0
  17. inspect_ai/_util/json.py +2 -1
  18. inspect_ai/_util/registry.py +1 -0
  19. inspect_ai/_util/vscode.py +37 -0
  20. inspect_ai/_view/www/App.css +6 -0
  21. inspect_ai/_view/www/dist/assets/index.css +304 -128
  22. inspect_ai/_view/www/dist/assets/index.js +47495 -27519
  23. inspect_ai/_view/www/log-schema.json +124 -31
  24. inspect_ai/_view/www/package.json +3 -0
  25. inspect_ai/_view/www/src/App.tsx +12 -0
  26. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  27. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  28. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  29. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  30. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
  31. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
  32. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  33. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  34. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  35. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  36. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  37. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
  38. inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
  39. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
  40. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
  41. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  42. inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
  43. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
  44. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
  45. inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
  46. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
  47. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
  48. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  49. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  50. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  51. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
  52. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
  53. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
  54. inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
  55. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
  56. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
  57. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  58. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  59. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  60. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  61. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
  62. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
  64. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
  65. inspect_ai/_view/www/src/state/hooks.ts +5 -3
  66. inspect_ai/_view/www/src/state/logPolling.ts +5 -1
  67. inspect_ai/_view/www/src/state/logSlice.ts +10 -0
  68. inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
  69. inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +34 -26
  71. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  72. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  73. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
  74. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
  75. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
  76. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  77. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  78. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
  79. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  80. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
  81. inspect_ai/_view/www/yarn.lock +94 -1
  82. inspect_ai/agent/__init__.py +36 -0
  83. inspect_ai/agent/_agent.py +268 -0
  84. inspect_ai/agent/_as_solver.py +72 -0
  85. inspect_ai/agent/_as_tool.py +122 -0
  86. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  87. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  88. inspect_ai/agent/_filter.py +46 -0
  89. inspect_ai/agent/_handoff.py +93 -0
  90. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  91. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  92. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  93. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  94. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  95. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  96. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  97. inspect_ai/agent/_react.py +241 -0
  98. inspect_ai/agent/_run.py +36 -0
  99. inspect_ai/agent/_types.py +81 -0
  100. inspect_ai/log/_log.py +11 -2
  101. inspect_ai/log/_transcript.py +13 -9
  102. inspect_ai/model/__init__.py +7 -1
  103. inspect_ai/model/_call_tools.py +256 -52
  104. inspect_ai/model/_chat_message.py +7 -4
  105. inspect_ai/model/_conversation.py +13 -62
  106. inspect_ai/model/_display.py +85 -0
  107. inspect_ai/model/_model.py +113 -14
  108. inspect_ai/model/_model_output.py +14 -9
  109. inspect_ai/model/_openai.py +16 -4
  110. inspect_ai/model/_openai_computer_use.py +162 -0
  111. inspect_ai/model/_openai_responses.py +319 -165
  112. inspect_ai/model/_providers/anthropic.py +20 -21
  113. inspect_ai/model/_providers/azureai.py +24 -13
  114. inspect_ai/model/_providers/bedrock.py +1 -7
  115. inspect_ai/model/_providers/cloudflare.py +3 -3
  116. inspect_ai/model/_providers/goodfire.py +2 -6
  117. inspect_ai/model/_providers/google.py +11 -10
  118. inspect_ai/model/_providers/groq.py +6 -3
  119. inspect_ai/model/_providers/hf.py +7 -3
  120. inspect_ai/model/_providers/mistral.py +7 -10
  121. inspect_ai/model/_providers/openai.py +47 -17
  122. inspect_ai/model/_providers/openai_o1.py +11 -4
  123. inspect_ai/model/_providers/openai_responses.py +12 -14
  124. inspect_ai/model/_providers/providers.py +2 -2
  125. inspect_ai/model/_providers/together.py +12 -2
  126. inspect_ai/model/_providers/util/chatapi.py +7 -2
  127. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  128. inspect_ai/model/_providers/util/llama31.py +4 -2
  129. inspect_ai/model/_providers/vertex.py +11 -9
  130. inspect_ai/model/_providers/vllm.py +4 -4
  131. inspect_ai/scorer/__init__.py +2 -0
  132. inspect_ai/scorer/_metrics/__init__.py +2 -0
  133. inspect_ai/scorer/_metrics/grouped.py +84 -0
  134. inspect_ai/scorer/_score.py +26 -6
  135. inspect_ai/solver/__init__.py +2 -2
  136. inspect_ai/solver/_basic_agent.py +22 -9
  137. inspect_ai/solver/_bridge.py +31 -0
  138. inspect_ai/solver/_chain.py +20 -12
  139. inspect_ai/solver/_fork.py +5 -1
  140. inspect_ai/solver/_human_agent.py +52 -0
  141. inspect_ai/solver/_prompt.py +3 -1
  142. inspect_ai/solver/_run.py +59 -0
  143. inspect_ai/solver/_solver.py +14 -4
  144. inspect_ai/solver/_task_state.py +5 -3
  145. inspect_ai/tool/_tool_call.py +15 -8
  146. inspect_ai/tool/_tool_def.py +17 -12
  147. inspect_ai/tool/_tool_support_helpers.py +2 -2
  148. inspect_ai/tool/_tool_with.py +14 -11
  149. inspect_ai/tool/_tools/_bash_session.py +11 -2
  150. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  151. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  152. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  153. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  154. inspect_ai/tool/_tools/_think.py +1 -1
  155. inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
  156. inspect_ai/util/__init__.py +2 -0
  157. inspect_ai/util/_anyio.py +27 -0
  158. inspect_ai/util/_sandbox/__init__.py +2 -1
  159. inspect_ai/util/_sandbox/context.py +32 -7
  160. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  161. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  162. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  163. inspect_ai/util/_store_model.py +30 -7
  164. inspect_ai/util/_subprocess.py +13 -3
  165. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
  166. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
  167. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
  168. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  169. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  170. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  171. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  172. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  173. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  174. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  175. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  176. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  177. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
  178. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
  179. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
  180. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import re
2
2
 
3
3
  from pydantic import BaseModel, Field
4
+ from shortuuid import uuid
4
5
 
5
6
  from inspect_ai._util.content import ContentText
6
7
  from inspect_ai._util.error import PrerequisiteError
@@ -31,22 +32,30 @@ class CrawlerResult(BaseModel):
31
32
  error: str | None = None
32
33
 
33
34
 
34
- def web_browser(interactive: bool = True) -> list[Tool]:
35
+ def web_browser(
36
+ *, interactive: bool = True, instance: str | None = uuid()
37
+ ) -> list[Tool]:
35
38
  """Tools used for web browser navigation.
36
39
 
37
- See documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
40
+ By default, a separate web browser process is created within the sandbox for each
41
+ call to `web_browser()`. You can modify this behavior by passing `instance=None`
42
+ (which will result in a single web browser for the entire sample) or use other
43
+ `instance` values that implement another scheme).
44
+
45
+ See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
38
46
 
39
47
  Args:
40
48
  interactive: Provide interactive tools (enable
41
49
  clicking, typing, and submitting forms). Defaults
42
50
  to True.
51
+ instance: Instance id (each unique instance id has its own web browser process)
43
52
 
44
53
  Returns:
45
54
  List of tools used for web browser navigation.
46
55
 
47
56
  """
48
57
  # start with go tool (excluding interactive docs if necessary)
49
- go = web_browser_go()
58
+ go = web_browser_go(instance)
50
59
  if not interactive:
51
60
  go = go_without_interactive_docs(go)
52
61
  tools = [go]
@@ -54,24 +63,27 @@ def web_browser(interactive: bool = True) -> list[Tool]:
54
63
  # add interactive tools if requested
55
64
  if interactive:
56
65
  tools = tools + [
57
- web_browser_click(),
58
- web_browser_type_submit(),
59
- web_browser_type(),
66
+ tool_with_web_at_viewer(web_browser_click(instance), instance),
67
+ tool_with_web_at_viewer(web_browser_type_submit(instance), instance),
68
+ tool_with_web_at_viewer(web_browser_type(instance), instance),
60
69
  ]
61
70
 
62
71
  # add navigational tools
63
72
  return tools + [
64
- web_browser_scroll(),
65
- web_browser_back(),
66
- web_browser_forward(),
67
- web_browser_refresh(),
73
+ web_browser_scroll(instance),
74
+ web_browser_back(instance),
75
+ web_browser_forward(instance),
76
+ web_browser_refresh(instance),
68
77
  ]
69
78
 
70
79
 
71
80
  @tool(parallel=False)
72
- def web_browser_go() -> Tool:
81
+ def web_browser_go(instance: str | None = None) -> Tool:
73
82
  """Web Browser tool for navigation to a URL.
74
83
 
84
+ Args:
85
+ instance: Instance id (each unique instance id has its own web browser process)
86
+
75
87
  Returns:
76
88
  Web browser navigation tool.
77
89
  """
@@ -102,7 +114,7 @@ def web_browser_go() -> Tool:
102
114
  Returns:
103
115
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
104
116
  """
105
- return await _web_browser_cmd("web_go", locals())
117
+ return await _web_browser_cmd("web_go", instance, locals())
106
118
 
107
119
  return execute
108
120
 
@@ -126,36 +138,44 @@ class WebBrowserStore(StoreModel):
126
138
  session_id: str = Field(default_factory=str)
127
139
 
128
140
 
129
- def web_at_viewer(call: ToolCall) -> ToolCallView:
130
- # get the web accessibility tree, if we have it create a view from it
131
- web_at = store_as(WebBrowserStore).web_at
132
- element_id = call.arguments.get("element_id", 0)
133
- if web_at and element_id:
134
- lines = web_at.splitlines()
135
- pattern = re.compile(rf"^\s+\[{element_id}\] .*$")
136
- for i, line in enumerate(lines):
137
- if pattern.match(line):
138
- snippet = (
139
- lines[0:1]
140
- + [" ..."]
141
- + lines[max(i - 2, 1) : i]
142
- + [line.replace(" ", "*", 1)]
143
- + lines[i + 1 : min(i + 3, len(lines))]
144
- + [" ..."]
145
- )
146
-
147
- return ToolCallView(
148
- context=ToolCallContent(format="text", content="\n".join(snippet))
149
- )
150
-
151
- # no view found
152
- return ToolCallView()
153
-
154
-
155
- @tool(viewer=web_at_viewer, parallel=False)
156
- def web_browser_click() -> Tool:
141
+ def tool_with_web_at_viewer(tool: Tool, instance: str | None = None) -> Tool:
142
+ def web_at_viewer(call: ToolCall) -> ToolCallView:
143
+ # get the web accessibility tree, if we have it create a view from it
144
+ web_at = store_as(WebBrowserStore, instance=instance).web_at
145
+ element_id = call.arguments.get("element_id", 0)
146
+ if web_at and element_id:
147
+ lines = web_at.splitlines()
148
+ pattern = re.compile(rf"^\s+\[{element_id}\] .*$")
149
+ for i, line in enumerate(lines):
150
+ if pattern.match(line):
151
+ snippet = (
152
+ lines[0:1]
153
+ + [" ..."]
154
+ + lines[max(i - 2, 1) : i]
155
+ + [line.replace(" ", "*", 1)]
156
+ + lines[i + 1 : min(i + 3, len(lines))]
157
+ + [" ..."]
158
+ )
159
+
160
+ return ToolCallView(
161
+ context=ToolCallContent(
162
+ format="text", content="\n".join(snippet)
163
+ )
164
+ )
165
+
166
+ # no view found
167
+ return ToolCallView()
168
+
169
+ return tool_with(tool, viewer=web_at_viewer)
170
+
171
+
172
+ @tool(parallel=False)
173
+ def web_browser_click(instance: str | None = None) -> Tool:
157
174
  """Web Browser tool for clicking an element on a web page.
158
175
 
176
+ Args:
177
+ instance: Instance id (each unique instance id has its own web browser process)
178
+
159
179
  Returns:
160
180
  Web browser clicking tool.
161
181
  """
@@ -182,15 +202,18 @@ def web_browser_click() -> Tool:
182
202
  Returns:
183
203
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
184
204
  """
185
- return await _web_browser_cmd("web_click", locals())
205
+ return await _web_browser_cmd("web_click", instance, locals())
186
206
 
187
207
  return execute
188
208
 
189
209
 
190
- @tool(viewer=web_at_viewer, parallel=False)
191
- def web_browser_type_submit() -> Tool:
210
+ @tool(parallel=False)
211
+ def web_browser_type_submit(instance: str | None = None) -> Tool:
192
212
  """Web Browser tool for typing and submitting input.
193
213
 
214
+ Args:
215
+ instance: Instance id (each unique instance id has its own web browser process)
216
+
194
217
  Returns:
195
218
  Web browser type and submit tool.
196
219
  """
@@ -220,15 +243,18 @@ def web_browser_type_submit() -> Tool:
220
243
  Returns:
221
244
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
222
245
  """
223
- return await _web_browser_cmd("web_type_submit", locals())
246
+ return await _web_browser_cmd("web_type_submit", instance, locals())
224
247
 
225
248
  return execute
226
249
 
227
250
 
228
- @tool(viewer=web_at_viewer, parallel=False)
229
- def web_browser_type() -> Tool:
251
+ @tool(parallel=False)
252
+ def web_browser_type(instance: str | None = None) -> Tool:
230
253
  """Web Browser tool for typing into inputs.
231
254
 
255
+ Args:
256
+ instance: Instance id (each unique instance id has its own web browser process)
257
+
232
258
  Returns:
233
259
  Web browser typing tool.
234
260
  """
@@ -258,15 +284,18 @@ def web_browser_type() -> Tool:
258
284
  Returns:
259
285
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
260
286
  """
261
- return await _web_browser_cmd("web_type", locals())
287
+ return await _web_browser_cmd("web_type", instance, locals())
262
288
 
263
289
  return execute
264
290
 
265
291
 
266
292
  @tool(parallel=False)
267
- def web_browser_scroll() -> Tool:
293
+ def web_browser_scroll(instance: str | None = None) -> Tool:
268
294
  """Web Browser tool for scrolling up or down one page.
269
295
 
296
+ Args:
297
+ instance: Instance id (each unique instance id has its own web browser process)
298
+
270
299
  Returns:
271
300
  Web browser scrolling tool.
272
301
  """
@@ -288,15 +317,18 @@ def web_browser_scroll() -> Tool:
288
317
  Returns:
289
318
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
290
319
  """
291
- return await _web_browser_cmd("web_scroll", locals())
320
+ return await _web_browser_cmd("web_scroll", instance, locals())
292
321
 
293
322
  return execute
294
323
 
295
324
 
296
325
  @tool(parallel=False)
297
- def web_browser_back() -> Tool:
326
+ def web_browser_back(instance: str | None = None) -> Tool:
298
327
  """Web Browser tool for navigating back in the browser history.
299
328
 
329
+ Args:
330
+ instance: Instance id (each unique instance id has its own web browser process)
331
+
300
332
  Returns:
301
333
  Web browser back navigation tool.
302
334
  """
@@ -309,15 +341,18 @@ def web_browser_back() -> Tool:
309
341
  Returns:
310
342
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
311
343
  """
312
- return await _web_browser_cmd("web_back", locals())
344
+ return await _web_browser_cmd("web_back", instance, locals())
313
345
 
314
346
  return execute
315
347
 
316
348
 
317
349
  @tool(parallel=False)
318
- def web_browser_forward() -> Tool:
350
+ def web_browser_forward(instance: str | None = None) -> Tool:
319
351
  """Web Browser tool for navigating forward in the browser history.
320
352
 
353
+ Args:
354
+ instance: Instance id (each unique instance id has its own web browser process)
355
+
321
356
  Returns:
322
357
  Web browser forward navigation tool.
323
358
  """
@@ -330,15 +365,18 @@ def web_browser_forward() -> Tool:
330
365
  Returns:
331
366
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
332
367
  """
333
- return await _web_browser_cmd("web_forward", locals())
368
+ return await _web_browser_cmd("web_forward", instance, locals())
334
369
 
335
370
  return execute
336
371
 
337
372
 
338
373
  @tool(parallel=False)
339
- def web_browser_refresh() -> Tool:
374
+ def web_browser_refresh(instance: str | None = None) -> Tool:
340
375
  """Web Browser tool for refreshing the current page.
341
376
 
377
+ Args:
378
+ instance: Instance id (each unique instance id has its own web browser process)
379
+
342
380
  Returns:
343
381
  Web browser page refresh tool.
344
382
  """
@@ -351,12 +389,14 @@ def web_browser_refresh() -> Tool:
351
389
  Returns:
352
390
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
353
391
  """
354
- return await _web_browser_cmd("web_refresh", locals())
392
+ return await _web_browser_cmd("web_refresh", instance, locals())
355
393
 
356
394
  return execute
357
395
 
358
396
 
359
- async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolResult:
397
+ async def _web_browser_cmd(
398
+ tool_name: str, instance: str | None, params: dict[str, object]
399
+ ) -> ToolResult:
360
400
  try:
361
401
  sandbox_env = await tool_container_sandbox("web browser")
362
402
  except PrerequisiteError as e:
@@ -369,7 +409,8 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
369
409
  except PrerequisiteError:
370
410
  raise e
371
411
 
372
- store = store_as(WebBrowserStore)
412
+ # bind to store (use instance id if provided)
413
+ store = store_as(WebBrowserStore, instance=instance)
373
414
 
374
415
  if not store.session_id:
375
416
  store.session_id = (
@@ -397,10 +438,8 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
397
438
  line.partition("data:image/png;base64")[0] for line in web_at_lines
398
439
  ]
399
440
 
400
- store_as(WebBrowserStore).main_content = (
401
- main_content or "(no main text summary)"
402
- )
403
- store_as(WebBrowserStore).web_at = web_at
441
+ store.main_content = main_content or "(no main text summary)"
442
+ store.web_at = web_at
404
443
 
405
444
  web_at = "\n".join(web_at_lines)
406
445
  return (
@@ -16,6 +16,7 @@ from ._sandbox import (
16
16
  SandboxEnvironmentSpec,
17
17
  SandboxEnvironmentType,
18
18
  sandbox,
19
+ sandbox_default,
19
20
  sandbox_with,
20
21
  sandboxenv,
21
22
  )
@@ -53,6 +54,7 @@ __all__ = [
53
54
  "sandboxenv",
54
55
  "sandbox",
55
56
  "sandbox_with",
57
+ "sandbox_default",
56
58
  "Store",
57
59
  "store",
58
60
  "StoreModel",
@@ -0,0 +1,27 @@
1
+ import sys
2
+
3
+ if sys.version_info < (3, 11):
4
+ from exceptiongroup import ExceptionGroup
5
+
6
+
7
+ def inner_exception(exc: Exception) -> Exception:
8
+ flattended = flatten_exception_group(exc)
9
+ return flattended[0]
10
+
11
+
12
+ def flatten_exception_group(exc: Exception) -> list[Exception]:
13
+ """Recursively flatten an ExceptionGroup to get all contained exceptions."""
14
+ if (
15
+ hasattr(exc, "__context__")
16
+ and exc.__context__ is not None
17
+ and isinstance(exc.__context__, Exception)
18
+ ):
19
+ return flatten_exception_group(exc.__context__) + [exc]
20
+
21
+ if isinstance(exc, ExceptionGroup):
22
+ flattened = []
23
+ for nested_exc in exc.exceptions:
24
+ flattened.extend(flatten_exception_group(nested_exc))
25
+ return flattened
26
+
27
+ return [exc]
@@ -1,6 +1,6 @@
1
1
  # note: unused imports are still required to ensure that our built-in sandbox environments are registered
2
2
 
3
- from .context import sandbox, sandbox_with
3
+ from .context import sandbox, sandbox_default, sandbox_with
4
4
  from .docker.docker import DockerSandboxEnvironment # noqa: F401
5
5
  from .environment import (
6
6
  SandboxConnection,
@@ -26,4 +26,5 @@ __all__ = [
26
26
  "sandboxenv",
27
27
  "sandbox",
28
28
  "sandbox_with",
29
+ "sandbox_default",
29
30
  ]
@@ -1,6 +1,7 @@
1
+ from contextlib import contextmanager
1
2
  from contextvars import ContextVar
2
3
  from logging import getLogger
3
- from typing import Any, NoReturn, cast
4
+ from typing import Any, Iterator, NoReturn, cast
4
5
 
5
6
  from shortuuid import uuid
6
7
 
@@ -39,7 +40,7 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
39
40
 
40
41
  # For None, 'default', or a single environment only take the first environment
41
42
  if name is None or name == "default" or len(environments) == 1:
42
- return list(environments.values())[0]
43
+ return default_sandbox_environment(environments)
43
44
  else:
44
45
  environment = environments.get(name, None)
45
46
  if not environment:
@@ -146,6 +147,12 @@ async def init_sandbox_environments_sample(
146
147
  environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
147
148
 
148
149
  try:
150
+ # set context
151
+ sandbox_environments_context_var.set(environments)
152
+ sandbox_with_environments_context_var.set({})
153
+ default_name = next(iter(environments.keys()))
154
+ sandbox_default_context_var.set(default_name)
155
+
149
156
  # copy files into environments
150
157
  await copy_sandbox_environment_files(files, environments)
151
158
 
@@ -153,10 +160,6 @@ async def init_sandbox_environments_sample(
153
160
  if setup:
154
161
  await setup_sandbox_environment(setup, environments)
155
162
 
156
- # set context
157
- sandbox_environments_context_var.set(environments)
158
- sandbox_with_environments_context_var.set({})
159
-
160
163
  # return environments
161
164
  return environments
162
165
 
@@ -239,7 +242,13 @@ async def setup_sandbox_environment(
239
242
  def default_sandbox_environment(
240
243
  environments: dict[str, SandboxEnvironment],
241
244
  ) -> SandboxEnvironment:
242
- return list(environments.values())[0]
245
+ default_name = sandbox_default_context_var.get()
246
+ if default_name in environments:
247
+ return environments[default_name]
248
+ else:
249
+ raise ValueError(
250
+ f"Default sandbox environment '{default_name}' not found in environments"
251
+ )
243
252
 
244
253
 
245
254
  def validate_sandbox_environments(
@@ -253,6 +262,20 @@ def validate_sandbox_environments(
253
262
  )
254
263
 
255
264
 
265
+ @contextmanager
266
+ def sandbox_default(name: str) -> Iterator[None]:
267
+ """Set the default sandbox environment for the current context.
268
+
269
+ Args:
270
+ name: Sandbox to set as the default.
271
+ """
272
+ token = sandbox_default_context_var.set(name)
273
+ try:
274
+ yield
275
+ finally:
276
+ sandbox_default_context_var.reset(token)
277
+
278
+
256
279
  sandbox_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
257
280
  "sandbox_environments"
258
281
  )
@@ -260,3 +283,5 @@ sandbox_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
260
283
  sandbox_with_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
261
284
  "sandbox_with_environments"
262
285
  )
286
+
287
+ sandbox_default_context_var = ContextVar[str]("sandbox_default")
@@ -25,6 +25,10 @@ def project_startup(project: ComposeProject) -> None:
25
25
  running_projects().append(project)
26
26
 
27
27
  # track auto compose we need to cleanup
28
+ project_record_auto_compose(project)
29
+
30
+
31
+ def project_record_auto_compose(project: ComposeProject) -> None:
28
32
  if project.config and is_auto_compose_file(project.config):
29
33
  auto_compose_files().add(project.config)
30
34
 
@@ -331,8 +331,8 @@ async def compose_command(
331
331
  retries = 0
332
332
  while True:
333
333
  try:
334
- command_timeout = (
335
- timeout if retries == 0 else (min(timeout, 60) // retries)
334
+ command_timeout = max(
335
+ timeout if retries == 0 else (min(timeout, 60) // retries), 1
336
336
  )
337
337
  return await run_command(command_timeout)
338
338
  except TimeoutError:
@@ -30,6 +30,7 @@ from .cleanup import (
30
30
  project_cleanup,
31
31
  project_cleanup_shutdown,
32
32
  project_cleanup_startup,
33
+ project_record_auto_compose,
33
34
  project_startup,
34
35
  )
35
36
  from .compose import (
@@ -78,6 +79,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
78
79
  name=task_project_name(task_name), config=config
79
80
  )
80
81
 
82
+ # record auto compose
83
+ project_record_auto_compose(project)
84
+
81
85
  # build containers which are out of date
82
86
  await compose_build(project)
83
87
 
@@ -310,7 +314,14 @@ class DockerSandboxEnvironment(SandboxEnvironment):
310
314
  # write the file
311
315
  if isinstance(contents, str):
312
316
  result = await self.exec(
313
- ["sh", "-e", "-c", 'tee -- "$1"', "write_file_script", file],
317
+ [
318
+ "sh",
319
+ "-e",
320
+ "-c",
321
+ 'tee -- "$1" > /dev/null',
322
+ "write_file_script",
323
+ file,
324
+ ],
314
325
  input=contents,
315
326
  timeout=TIMEOUT,
316
327
  )
@@ -15,6 +15,7 @@ class StoreModel(BaseModel):
15
15
  """
16
16
 
17
17
  store: Store = Field(exclude=True, default_factory=store)
18
+ instance: str | None = Field(exclude=True, default=None)
18
19
 
19
20
  def model_post_init(self, __context: Any) -> None:
20
21
  for name in self.model_fields.keys():
@@ -28,12 +29,18 @@ class StoreModel(BaseModel):
28
29
  elif name in self.__dict__.keys():
29
30
  self.store.set(ns_name, self.__dict__[name])
30
31
 
32
+ # validate that we aren't using a nested StoreModel
33
+ self._validate_value(name, self.__dict__[name])
34
+
31
35
  def __getattribute__(self, name: str) -> Any:
32
36
  # sidestep dunders and pydantic fields
33
37
  if name.startswith("__") or name.startswith("model_"):
34
38
  return object.__getattribute__(self, name)
35
- # handle model_fields (except 'store') by reading the store
36
- elif name in object.__getattribute__(self, "model_fields") and name != "store":
39
+ # handle model_fields (except 'store' and 'namespace') by reading the store
40
+ elif name in object.__getattribute__(self, "model_fields") and name not in [
41
+ "store",
42
+ "instance",
43
+ ]:
37
44
  store_key = self._ns_name(name)
38
45
  if store_key in self.store:
39
46
  return self.store.get(store_key)
@@ -44,6 +51,7 @@ class StoreModel(BaseModel):
44
51
  return super().__getattribute__(name)
45
52
 
46
53
  def __setattr__(self, name: str, value: Any) -> None:
54
+ self._validate_value(name, value)
47
55
  if name in self.model_fields:
48
56
  # validate with the new value (can throw ValidationError)
49
57
  temp_data = self.store._data.copy()
@@ -86,11 +94,23 @@ class StoreModel(BaseModel):
86
94
  # perform validation
87
95
  self.__class__.model_validate(validate)
88
96
 
97
+ def _validate_value(self, name: str, value: Any) -> None:
98
+ # validate that we aren't using a nested StoreModel
99
+ if isinstance(value, StoreModel):
100
+ raise TypeError(
101
+ f"{name} is a StoreModel and you may not embed a StoreModel "
102
+ "inside another StoreModel (derive from BaseModel for fields in a StoreModel)."
103
+ )
104
+
89
105
  def _ns_name(self, name: str) -> str:
90
- return f"{self.__class__.__name__}:{name}"
106
+ namespace = f"{self.instance}:" if self.instance is not None else ""
107
+ return f"{self.__class__.__name__}:{namespace}{name}"
91
108
 
92
109
  def _un_ns_name(self, name: str) -> str:
93
- return name.replace(f"{self.__class__.__name__}:", "", 1)
110
+ name = name.replace(f"{self.__class__.__name__}:", "", 1)
111
+ if self.instance:
112
+ name = name.replace(f"{self.instance}:", "", 1)
113
+ return name
94
114
 
95
115
  model_config = ConfigDict(arbitrary_types_allowed=True)
96
116
 
@@ -98,13 +118,16 @@ class StoreModel(BaseModel):
98
118
  SMT = TypeVar("SMT", bound=StoreModel)
99
119
 
100
120
 
101
- def store_as(model_cls: Type[SMT]) -> SMT:
121
+ def store_as(model_cls: Type[SMT], instance: str | None = None) -> SMT:
102
122
  """Get a Pydantic model interface to the store.
103
123
 
104
124
  Args:
105
125
  model_cls: Pydantic model type (must derive from StoreModel)
126
+ instance: Optional instance name for store (enables multiple instances
127
+ of a given StoreModel type within a single sample)
128
+
106
129
 
107
130
  Returns:
108
- StoreModel: Instance of model_cls bound to current Store.
131
+ StoreModel: model_cls bound to current Store.
109
132
  """
110
- return model_cls(store=store())
133
+ return model_cls(store=store(), instance=instance)
@@ -117,14 +117,15 @@ async def subprocess(
117
117
  async def run_command() -> AsyncGenerator[
118
118
  Union[Process, ExecResult[str], ExecResult[bytes]], None
119
119
  ]:
120
- async with await open_process(
120
+ process = await open_process(
121
121
  args,
122
122
  stdin=PIPE if input else DEVNULL,
123
123
  stdout=PIPE if capture_output else None,
124
124
  stderr=PIPE if capture_output else None,
125
125
  cwd=cwd,
126
126
  env={**os.environ, **env},
127
- ) as process:
127
+ )
128
+ try:
128
129
  # yield the process so the caller has a handle to it
129
130
  yield process
130
131
 
@@ -173,6 +174,15 @@ async def subprocess(
173
174
  stdout=stdout if capture_output else bytes(),
174
175
  stderr=stderr if capture_output else bytes(),
175
176
  )
177
+ finally:
178
+ try:
179
+ await process.aclose()
180
+ except ProcessLookupError:
181
+ # the anyio ansycio backend calls process.kill() from within
182
+ # its aclose() method without an enclosing exception handler
183
+ # (which in turn can throw ProcessLookupError if the process
184
+ # is already gone)
185
+ pass
176
186
 
177
187
  # wrapper for run command that implements timeout
178
188
  async def run_command_timeout() -> Union[ExecResult[str], ExecResult[bytes]]:
@@ -181,7 +191,7 @@ async def subprocess(
181
191
  proc = cast(Process, await anext(rc))
182
192
 
183
193
  # await result wrapped in timeout handler if requested
184
- if timeout:
194
+ if timeout is not None:
185
195
  try:
186
196
  with anyio.fail_after(timeout):
187
197
  result = await anext(rc)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.82
3
+ Version: 0.3.84
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License