inspect-ai 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. inspect_ai/__init__.py +3 -2
  2. inspect_ai/_cli/cache.py +1 -1
  3. inspect_ai/_cli/common.py +15 -0
  4. inspect_ai/_cli/eval.py +4 -5
  5. inspect_ai/_cli/log.py +1 -1
  6. inspect_ai/_cli/sandbox.py +1 -1
  7. inspect_ai/_cli/trace.py +1 -1
  8. inspect_ai/_cli/view.py +1 -1
  9. inspect_ai/_display/core/config.py +3 -1
  10. inspect_ai/_eval/eval.py +55 -61
  11. inspect_ai/_eval/evalset.py +64 -154
  12. inspect_ai/_eval/loader.py +27 -54
  13. inspect_ai/_eval/registry.py +4 -15
  14. inspect_ai/_eval/run.py +7 -4
  15. inspect_ai/_eval/task/__init__.py +8 -2
  16. inspect_ai/_eval/task/log.py +9 -1
  17. inspect_ai/_eval/task/resolved.py +35 -0
  18. inspect_ai/_eval/task/run.py +4 -0
  19. inspect_ai/_eval/task/task.py +50 -69
  20. inspect_ai/_eval/task/tasks.py +30 -0
  21. inspect_ai/_util/constants.py +3 -0
  22. inspect_ai/_util/dotenv.py +17 -0
  23. inspect_ai/_util/logger.py +3 -0
  24. inspect_ai/_util/registry.py +43 -2
  25. inspect_ai/_view/server.py +28 -10
  26. inspect_ai/_view/www/dist/assets/index.css +32 -19
  27. inspect_ai/_view/www/dist/assets/index.js +17682 -29989
  28. inspect_ai/_view/www/log-schema.json +79 -9
  29. inspect_ai/_view/www/package.json +2 -2
  30. inspect_ai/_view/www/src/appearance/styles.ts +6 -5
  31. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +2 -2
  32. inspect_ai/_view/www/src/constants.ts +3 -0
  33. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +141 -20
  34. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +2 -1
  35. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +1 -1
  36. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +7 -5
  37. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
  38. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
  39. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +1 -0
  40. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +3 -1
  41. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
  42. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +5 -2
  43. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
  44. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +5 -1
  45. inspect_ai/_view/www/src/types/log.d.ts +11 -5
  46. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +17 -12
  47. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -1
  48. inspect_ai/_view/www/yarn.lock +12 -5
  49. inspect_ai/log/_log.py +10 -1
  50. inspect_ai/log/_recorders/eval.py +27 -8
  51. inspect_ai/log/_recorders/json.py +10 -2
  52. inspect_ai/log/_transcript.py +13 -4
  53. inspect_ai/model/_call_tools.py +13 -4
  54. inspect_ai/model/_chat_message.py +15 -1
  55. inspect_ai/model/_model.py +30 -12
  56. inspect_ai/model/_model_output.py +6 -1
  57. inspect_ai/model/_openai.py +11 -6
  58. inspect_ai/model/_providers/anthropic.py +167 -77
  59. inspect_ai/model/_providers/google.py +6 -2
  60. inspect_ai/model/_providers/none.py +31 -0
  61. inspect_ai/model/_providers/openai.py +11 -8
  62. inspect_ai/model/_providers/providers.py +7 -0
  63. inspect_ai/model/_providers/vertex.py +5 -2
  64. inspect_ai/solver/_bridge/bridge.py +1 -1
  65. inspect_ai/solver/_chain.py +7 -6
  66. inspect_ai/tool/__init__.py +4 -0
  67. inspect_ai/tool/_tool_call.py +5 -2
  68. inspect_ai/tool/_tool_support_helpers.py +200 -0
  69. inspect_ai/tool/_tools/_bash_session.py +119 -0
  70. inspect_ai/tool/_tools/_computer/_computer.py +1 -1
  71. inspect_ai/tool/_tools/_text_editor.py +121 -0
  72. inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
  73. inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
  74. inspect_ai/tool/_tools/_web_search.py +2 -2
  75. inspect_ai/util/_json.py +28 -0
  76. inspect_ai/util/_sandbox/context.py +18 -8
  77. inspect_ai/util/_sandbox/docker/config.py +1 -1
  78. inspect_ai/util/_sandbox/docker/internal.py +3 -3
  79. inspect_ai/util/_sandbox/environment.py +17 -2
  80. {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/METADATA +8 -5
  81. {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/RECORD +85 -108
  82. {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/WHEEL +1 -1
  83. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
  84. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
  85. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
  86. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
  87. inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
  88. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
  89. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
  90. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
  91. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
  92. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
  93. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
  94. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
  95. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
  96. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
  97. inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
  98. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
  99. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
  100. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
  101. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
  102. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
  103. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
  104. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
  105. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
  106. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
  107. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
  108. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
  109. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
  110. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
  111. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
  112. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
  113. {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/entry_points.txt +0 -0
  114. {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info/licenses}/LICENSE +0 -0
  115. {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,40 @@
1
1
  import re
2
- from textwrap import dedent
3
2
 
4
- from pydantic import Field
3
+ from pydantic import BaseModel, Field
5
4
 
6
5
  from inspect_ai._util.content import ContentText
7
6
  from inspect_ai._util.error import PrerequisiteError
8
7
  from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
9
8
  from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
10
9
  from inspect_ai.tool._tool_info import parse_tool_info
10
+ from inspect_ai.tool._tool_support_helpers import (
11
+ exec_sandbox_rpc,
12
+ tool_container_sandbox,
13
+ )
11
14
  from inspect_ai.tool._tool_with import tool_with
12
- from inspect_ai.util._sandbox import SandboxEnvironment, sandbox_with
13
- from inspect_ai.util._sandbox.docker.internal import INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB
14
15
  from inspect_ai.util._store_model import StoreModel, store_as
15
16
 
17
+ from ._back_compat import old_web_browser_cmd
18
+
19
+
20
+ # These two models are cloned from the container code. If/when we decide to create
21
+ # a package that is shared between the inspect and tool-container codebases, we'll
22
+ # just have to live with it.
23
+ class NewSessionResult(BaseModel):
24
+ session_name: str
25
+
26
+
27
+ class CrawlerResult(BaseModel):
28
+ web_url: str
29
+ main_content: str | None = None
30
+ web_at: str
31
+ error: str | None = None
32
+
16
33
 
17
34
  def web_browser(interactive: bool = True) -> list[Tool]:
18
35
  """Tools used for web browser navigation.
19
36
 
20
- See documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-browser>.
37
+ See documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
21
38
 
22
39
  Args:
23
40
  interactive: Provide interactive tools (enable
@@ -85,7 +102,7 @@ def web_browser_go() -> Tool:
85
102
  Returns:
86
103
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
87
104
  """
88
- return await web_browser_cmd("web_go", url)
105
+ return await _web_browser_cmd("web_go", locals())
89
106
 
90
107
  return execute
91
108
 
@@ -165,7 +182,7 @@ def web_browser_click() -> Tool:
165
182
  Returns:
166
183
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
167
184
  """
168
- return await web_browser_cmd("web_click", str(element_id))
185
+ return await _web_browser_cmd("web_click", locals())
169
186
 
170
187
  return execute
171
188
 
@@ -203,7 +220,7 @@ def web_browser_type_submit() -> Tool:
203
220
  Returns:
204
221
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
205
222
  """
206
- return await web_browser_cmd("web_type_submit", str(element_id), text)
223
+ return await _web_browser_cmd("web_type_submit", locals())
207
224
 
208
225
  return execute
209
226
 
@@ -241,7 +258,7 @@ def web_browser_type() -> Tool:
241
258
  Returns:
242
259
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
243
260
  """
244
- return await web_browser_cmd("web_type", str(element_id), text)
261
+ return await _web_browser_cmd("web_type", locals())
245
262
 
246
263
  return execute
247
264
 
@@ -271,7 +288,7 @@ def web_browser_scroll() -> Tool:
271
288
  Returns:
272
289
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
273
290
  """
274
- return await web_browser_cmd("web_scroll", direction)
291
+ return await _web_browser_cmd("web_scroll", locals())
275
292
 
276
293
  return execute
277
294
 
@@ -292,7 +309,7 @@ def web_browser_back() -> Tool:
292
309
  Returns:
293
310
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
294
311
  """
295
- return await web_browser_cmd("web_back")
312
+ return await _web_browser_cmd("web_back", locals())
296
313
 
297
314
  return execute
298
315
 
@@ -313,7 +330,7 @@ def web_browser_forward() -> Tool:
313
330
  Returns:
314
331
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
315
332
  """
316
- return await web_browser_cmd("web_forward")
333
+ return await _web_browser_cmd("web_forward", locals())
317
334
 
318
335
  return execute
319
336
 
@@ -334,133 +351,61 @@ def web_browser_refresh() -> Tool:
334
351
  Returns:
335
352
  Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
336
353
  """
337
- return await web_browser_cmd("web_refresh")
354
+ return await _web_browser_cmd("web_refresh", locals())
338
355
 
339
356
  return execute
340
357
 
341
358
 
342
- WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
343
- WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
344
-
345
-
346
- async def web_browser_cmd(cmd: str, *args: str) -> ToolResult:
347
- sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
348
- session_flag = ""
349
- if sandbox_env:
350
- store = store_as(WebBrowserStore)
351
- if not store.session_id:
352
- result = await sandbox_env.exec(
353
- ["python3", WEB_CLIENT_NEW_SESSION], timeout=180
359
+ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolResult:
360
+ try:
361
+ sandbox_env = await tool_container_sandbox("web browser")
362
+ except PrerequisiteError as e:
363
+ # The user may have the old, incompatible, sandbox. If so, use that and
364
+ # execute the old compatible code.
365
+ try:
366
+ return await old_web_browser_cmd(tool_name, *params)
367
+ except PrerequisiteError:
368
+ raise e
369
+
370
+ store = store_as(WebBrowserStore)
371
+
372
+ if not store.session_id:
373
+ store.session_id = (
374
+ await exec_sandbox_rpc(
375
+ sandbox_env,
376
+ "web_new_session",
377
+ {"headful": False},
378
+ NewSessionResult,
354
379
  )
380
+ ).session_name
355
381
 
356
- if not result.success:
357
- raise RuntimeError(
358
- f"Error creating new web browser session: {result.stderr}"
359
- )
360
-
361
- store.session_id = result.stdout.strip("\n")
382
+ params["session_name"] = store.session_id
362
383
 
363
- session_flag = f"--session_name={store.session_id}"
364
-
365
- else:
366
- sandbox_env = await web_browser_sandbox()
367
-
368
- arg_list = None
369
- if session_flag:
370
- arg_list = ["python3", WEB_CLIENT_REQUEST, session_flag, cmd] + list(args)
384
+ crawler_result = await exec_sandbox_rpc(
385
+ sandbox_env, tool_name, params, CrawlerResult
386
+ )
387
+ if crawler_result.error and crawler_result.error.strip() != "":
388
+ raise ToolError(crawler_result.error)
371
389
  else:
372
- arg_list = ["python3", WEB_CLIENT_REQUEST, cmd] + list(args)
390
+ main_content = crawler_result.main_content
391
+ web_at = crawler_result.web_at or "(no web accessibility tree available)"
392
+ # Remove base64 data from images.
393
+ web_at_lines = web_at.split("\n")
394
+ web_at_lines = [
395
+ line.partition("data:image/png;base64")[0] for line in web_at_lines
396
+ ]
373
397
 
374
- result = await sandbox_env.exec(arg_list, timeout=180)
375
- if not result.success:
376
- raise RuntimeError(
377
- f"Error executing web browser command {cmd}({', '.join(args)}): {result.stderr}"
398
+ store_as(WebBrowserStore).main_content = (
399
+ main_content or "(no main text summary)"
378
400
  )
379
- else:
380
- response = parse_web_browser_output(result.stdout)
381
- if "error" in response and response.get("error", "").strip() != "":
382
- raise ToolError(str(response.get("error")) or "(unknown error)")
383
- elif "web_at" in response:
384
- main_content = str(response.get("main_content")) or None
385
- web_at = (
386
- str(response.get("web_at")) or "(no web accessibility tree available)"
387
- )
388
- # Remove base64 data from images.
389
- web_at_lines = web_at.split("\n")
390
- web_at_lines = [
391
- line.partition("data:image/png;base64")[0] for line in web_at_lines
392
- ]
401
+ store_as(WebBrowserStore).web_at = web_at
393
402
 
394
- store_as(WebBrowserStore).main_content = (
395
- main_content or "(no main text summary)"
396
- )
397
- store_as(WebBrowserStore).web_at = web_at
398
-
399
- web_at = "\n".join(web_at_lines)
400
- return (
401
- [
402
- ContentText(text=f"main content:\n{main_content}\n\n"),
403
- ContentText(text=f"accessibility tree:\n{web_at}"),
404
- ]
405
- if main_content
406
- else web_at
407
- )
408
- else:
409
- raise RuntimeError(
410
- f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
411
- )
412
-
413
-
414
- async def web_browser_sandbox() -> SandboxEnvironment:
415
- sb = await sandbox_with(WEB_CLIENT_REQUEST)
416
- if sb:
417
- return sb
418
- else:
419
- msg = dedent(f"""
420
- The web browser service was not found in any of the sandboxes for this sample. Please add the web browser service to your configuration. For example, the following Docker compose file uses the {INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB} image as its default sandbox:
421
-
422
- services:
423
- default:
424
- image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB}"
425
- init: true
426
-
427
- Alternatively, this Docker compose file creates a dedicated image for the web browser service:
428
-
429
- services:
430
- default:
431
- image: "python:3.12-bookworm"
432
- init: true
433
- command: "tail -f /dev/null"
434
-
435
- web_browser:
436
- image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB}"
437
- init: true
438
- """).strip()
439
- raise PrerequisiteError(msg)
440
-
441
-
442
- def parse_web_browser_output(output: str) -> dict[str, str]:
443
- response: dict[str, str] = dict(
444
- web_url="", main_content="", web_at="", info="", error=""
445
- )
446
- active_field: str | None = None
447
- active_field_lines: list[str] = []
448
-
449
- def collect_active_field() -> None:
450
- if active_field is not None:
451
- response[active_field] = "\n".join(active_field_lines)
452
- active_field_lines.clear()
453
-
454
- for line in output.splitlines():
455
- field_match = re.match(
456
- r"^(error|main_content|web_at|web_url|info)\s*:\s*(.+)$", line
403
+ web_at = "\n".join(web_at_lines)
404
+ return (
405
+ [
406
+ ContentText(text=f"main content:\n{main_content}\n\n"),
407
+ ContentText(text=f"accessibility tree:\n{web_at}"),
408
+ ]
409
+ if main_content
410
+ else web_at
457
411
  )
458
- if field_match:
459
- collect_active_field()
460
- active_field = field_match.group(1)
461
- active_field_lines.append(field_match.group(2))
462
- else:
463
- active_field_lines.append(line)
464
- collect_active_field()
465
-
466
- return response
@@ -52,7 +52,7 @@ def web_search(
52
52
  A web search is conducted using the specified provider, the results are parsed for relevance
53
53
  using the specified model, and the top 'num_results' relevant pages are returned.
54
54
 
55
- See further documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-search>.
55
+ See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
56
56
 
57
57
  Args:
58
58
  provider: Search provider (defaults to "google", currently
@@ -190,7 +190,7 @@ def google_search_provider(client: httpx.AsyncClient) -> SearchProvider:
190
190
  google_cse_id = os.environ.get("GOOGLE_CSE_ID", None)
191
191
  if not google_api_key or not google_cse_id:
192
192
  raise PrerequisiteError(
193
- "GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.ai-safety-institute.org.uk/tools.html#google-provider"
193
+ "GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.aisi.org.uk/tools.html#google-provider"
194
194
  )
195
195
 
196
196
  async def search(query: str, start_idx: int) -> list[SearchLink]:
inspect_ai/util/_json.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import types
2
2
  import typing
3
+ from copy import deepcopy
3
4
  from dataclasses import is_dataclass
4
5
  from typing import (
5
6
  Any,
@@ -10,6 +11,7 @@ from typing import (
10
11
  Tuple,
11
12
  Type,
12
13
  Union,
14
+ cast,
13
15
  get_args,
14
16
  get_origin,
15
17
  get_type_hints,
@@ -127,6 +129,7 @@ def cls_json_schema(cls: Type[Any]) -> JSONSchema:
127
129
  required.append(name)
128
130
  elif isinstance(cls, type) and issubclass(cls, BaseModel):
129
131
  schema = cls.model_json_schema()
132
+ schema = resolve_schema_references(schema)
130
133
  for name, prop in schema.get("properties", {}).items():
131
134
  properties[name] = JSONSchema(**prop)
132
135
  required = schema.get("required", [])
@@ -168,3 +171,28 @@ def python_type_to_json_type(python_type: str | None) -> JSONType:
168
171
  raise ValueError(
169
172
  f"Unsupported type: {python_type} for Python to JSON conversion."
170
173
  )
174
+
175
+
176
+ def resolve_schema_references(schema: dict[str, Any]) -> dict[str, Any]:
177
+ """Resolves all $ref references in a JSON schema by inlining the definitions."""
178
+ schema = deepcopy(schema)
179
+ definitions = schema.pop("$defs", {})
180
+
181
+ def _resolve_refs(obj: Any) -> Any:
182
+ if isinstance(obj, dict):
183
+ if "$ref" in obj and obj["$ref"].startswith("#/$defs/"):
184
+ ref_key = obj["$ref"].split("/")[-1]
185
+ if ref_key in definitions:
186
+ # Replace with a deep copy of the definition
187
+ resolved = deepcopy(definitions[ref_key])
188
+ # Process any nested references in the definition
189
+ return _resolve_refs(resolved)
190
+
191
+ # Process all entries in the dictionary
192
+ return {k: _resolve_refs(v) for k, v in obj.items()}
193
+ elif isinstance(obj, list):
194
+ return [_resolve_refs(item) for item in obj]
195
+ else:
196
+ return obj
197
+
198
+ return cast(dict[str, Any], _resolve_refs(schema))
@@ -49,11 +49,14 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
49
49
  return environment
50
50
 
51
51
 
52
- async def sandbox_with(file: str) -> SandboxEnvironment | None:
52
+ async def sandbox_with(file: str, on_path: bool = False) -> SandboxEnvironment | None:
53
53
  """Get the SandboxEnvironment for the current sample that has the specified file.
54
54
 
55
55
  Args:
56
- file (str): Path to file to check for.
56
+ file (str): Path to file to check for if on_path is False. If on_path is
57
+ True, file should be a filename that exists on the system path.
58
+ on_path (bool): If True, file is a filename to be verified using "which".
59
+ If False, file is a path to be checked within the sandbox environments.
57
60
 
58
61
  Return:
59
62
  SandboxEnvironment instance or None if no sandboxes had the file.
@@ -66,19 +69,25 @@ async def sandbox_with(file: str) -> SandboxEnvironment | None:
66
69
  if environments_with is None:
67
70
  raise_no_sandbox()
68
71
 
69
- # if we've already disovered the sandbox for this file then return it
70
- environment = environments_with.get(file, None)
72
+ # if we've already discovered the sandbox for this file then return it
73
+ environment_with_key = f"{file}:{on_path}"
74
+ environment = environments_with.get(environment_with_key, None)
71
75
  if environment is not None:
72
76
  return environment
73
77
 
74
78
  # look in each sandbox
75
79
  for _, environment in environments.items():
76
80
  try:
77
- # can we read the file?
78
- await environment.read_file(file)
81
+ if on_path:
82
+ # can we find the file on the path?
83
+ if not (await environment.exec(["which", file])).success:
84
+ continue
85
+ else:
86
+ # can we read the file?
87
+ await environment.read_file(file)
79
88
 
80
89
  # if so this is our environment, cache and return it
81
- environments_with[file] = environment
90
+ environments_with[environment_with_key] = environment
82
91
  return environment
83
92
 
84
93
  # allow exception types known to be raised from read_file
@@ -192,7 +201,8 @@ async def copy_sandbox_environment_files(
192
201
  target_env = environments.get(envname, None)
193
202
  if not target_env:
194
203
  raise RuntimeError(
195
- f"Environment referenced in sample file not found: '{envname}:{file}'"
204
+ f"Environment referenced in sample file not found: '{envname}:{file}'. "
205
+ + "Note that ':' can be optionally used to specify an explicit environment name for sample files (e.g. 'envname:file') so cannot be used as a character within filenames."
196
206
  )
197
207
  else:
198
208
  target_env = default_environment
@@ -82,7 +82,7 @@ COMPOSE_COMMENT = """# inspect auto-generated docker compose file
82
82
  COMPOSE_GENERIC_YAML = f"""{COMPOSE_COMMENT}
83
83
  services:
84
84
  default:
85
- image: "python:3.12-bookworm"
85
+ image: "aisiuk/inspect-tool-support"
86
86
  command: "tail -f /dev/null"
87
87
  init: true
88
88
  network_mode: none
@@ -3,13 +3,13 @@ from inspect_ai._util.error import PrerequisiteError
3
3
  from inspect_ai.util._display import display_type
4
4
  from inspect_ai.util._subprocess import subprocess
5
5
 
6
- INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB = "aisiuk/inspect-web-browser-tool"
6
+ INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB_DEPRECATED = "aisiuk/inspect-web-browser-tool"
7
7
 
8
- INSPECT_WEB_BROWSER_IMAGE = "inspect_web_browser"
8
+ INSPECT_WEB_BROWSER_IMAGE_DEPRECATED = "inspect_web_browser"
9
9
  INSPECT_COMPUTER_IMAGE = "inspect-computer-tool"
10
10
 
11
11
  INTERNAL_IMAGES = {
12
- INSPECT_WEB_BROWSER_IMAGE: PKG_PATH
12
+ INSPECT_WEB_BROWSER_IMAGE_DEPRECATED: PKG_PATH
13
13
  / "tool"
14
14
  / "_tools"
15
15
  / "_web_browser"
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import logging
4
5
  from dataclasses import dataclass, field
5
6
  from typing import (
6
7
  Annotated,
@@ -17,8 +18,12 @@ from typing import (
17
18
 
18
19
  from pydantic import BaseModel, Field, model_validator
19
20
 
21
+ from inspect_ai._util.logger import warn_once
22
+
20
23
  from .._subprocess import ExecResult
21
24
 
25
+ logger = logging.getLogger(__name__)
26
+
22
27
  ST = TypeVar("ST", bound="SandboxEnvironment")
23
28
 
24
29
  TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
@@ -381,11 +386,21 @@ def resolve_sandbox_environment(
381
386
  return None
382
387
 
383
388
 
384
- def deserialize_sandbox_specific_config(type: str, config: dict[str, Any]) -> BaseModel:
389
+ def deserialize_sandbox_specific_config(
390
+ type: str, config: dict[str, Any]
391
+ ) -> BaseModel | dict[str, Any]:
385
392
  # Avoid circular import
386
393
  from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
387
394
 
388
- sandboxenv_type = registry_find_sandboxenv(type)
395
+ try:
396
+ sandboxenv_type = registry_find_sandboxenv(type)
397
+ except ValueError:
398
+ warn_once(
399
+ logger,
400
+ f"Could not find sandbox environment plugin for type '{type}'. "
401
+ "Ensure the plugin is installed in your environment.",
402
+ )
403
+ return config
389
404
  config_deserialize = cast(
390
405
  ConfigDeserialize, getattr(sandboxenv_type, "config_deserialize")
391
406
  )
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.74
3
+ Version: 0.3.76
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
7
- Project-URL: Documentation, https://inspect.ai-safety-institute.org.uk/
7
+ Project-URL: Documentation, https://inspect.aisi.org.uk/
8
8
  Project-URL: Source Code, https://github.com/UKGovernmentBEIS/inspect_ai
9
9
  Project-URL: Issue Tracker, https://github.com/UKGovernmentBEIS/inspect_ai/issues
10
10
  Classifier: Development Status :: 4 - Beta
@@ -60,11 +60,13 @@ Requires-Dist: goodfire; extra == "dev"
60
60
  Requires-Dist: griffe; extra == "dev"
61
61
  Requires-Dist: groq; extra == "dev"
62
62
  Requires-Dist: ipython; extra == "dev"
63
+ Requires-Dist: markdown; extra == "dev"
63
64
  Requires-Dist: mistralai; extra == "dev"
64
65
  Requires-Dist: moto[server]; extra == "dev"
65
66
  Requires-Dist: mypy; extra == "dev"
66
67
  Requires-Dist: nbformat; extra == "dev"
67
68
  Requires-Dist: openai; extra == "dev"
69
+ Requires-Dist: panflute; extra == "dev"
68
70
  Requires-Dist: pip; extra == "dev"
69
71
  Requires-Dist: pre-commit; extra == "dev"
70
72
  Requires-Dist: pylint; extra == "dev"
@@ -96,14 +98,15 @@ Requires-Dist: griffe; extra == "doc"
96
98
  Provides-Extra: dist
97
99
  Requires-Dist: twine; extra == "dist"
98
100
  Requires-Dist: build; extra == "dist"
101
+ Dynamic: license-file
99
102
 
100
- [<img width="295" src="https://inspect.ai-safety-institute.org.uk/images/aisi-logo.svg" />](https://aisi.gov.uk/)
103
+ [<img width="295" src="https://inspect.aisi.org.uk/images/aisi-logo.svg" />](https://aisi.gov.uk/)
101
104
 
102
105
  Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk/).
103
106
 
104
107
  Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages.
105
108
 
106
- To get started with Inspect, please see the documentation at <https://inspect.ai-safety-institute.org.uk/>.
109
+ To get started with Inspect, please see the documentation at <https://inspect.aisi.org.uk/>.
107
110
 
108
111
  ***
109
112