inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. inspect_ai/__init__.py +1 -0
  2. inspect_ai/_cli/common.py +1 -1
  3. inspect_ai/_cli/trace.py +33 -20
  4. inspect_ai/_display/core/active.py +1 -1
  5. inspect_ai/_display/core/display.py +1 -1
  6. inspect_ai/_display/core/footer.py +1 -1
  7. inspect_ai/_display/core/panel.py +1 -1
  8. inspect_ai/_display/core/progress.py +0 -6
  9. inspect_ai/_display/core/rich.py +1 -1
  10. inspect_ai/_display/rich/display.py +2 -2
  11. inspect_ai/_display/textual/app.py +15 -17
  12. inspect_ai/_display/textual/widgets/clock.py +3 -3
  13. inspect_ai/_display/textual/widgets/samples.py +6 -13
  14. inspect_ai/_eval/context.py +9 -1
  15. inspect_ai/_eval/run.py +16 -11
  16. inspect_ai/_eval/score.py +4 -10
  17. inspect_ai/_eval/task/results.py +5 -4
  18. inspect_ai/_eval/task/run.py +6 -12
  19. inspect_ai/_eval/task/task.py +10 -0
  20. inspect_ai/_util/ansi.py +31 -0
  21. inspect_ai/_util/datetime.py +1 -1
  22. inspect_ai/_util/deprecation.py +1 -1
  23. inspect_ai/_util/format.py +7 -0
  24. inspect_ai/_util/json.py +11 -1
  25. inspect_ai/_util/logger.py +14 -13
  26. inspect_ai/_util/throttle.py +10 -1
  27. inspect_ai/_util/trace.py +79 -47
  28. inspect_ai/_util/transcript.py +37 -4
  29. inspect_ai/_util/vscode.py +51 -0
  30. inspect_ai/_view/notify.py +2 -1
  31. inspect_ai/_view/www/.prettierrc.js +12 -0
  32. inspect_ai/_view/www/App.css +22 -1
  33. inspect_ai/_view/www/dist/assets/index.css +2374 -2
  34. inspect_ai/_view/www/dist/assets/index.js +29752 -24492
  35. inspect_ai/_view/www/log-schema.json +262 -215
  36. inspect_ai/_view/www/package.json +1 -0
  37. inspect_ai/_view/www/src/App.mjs +19 -9
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/api/Types.mjs +15 -4
  40. inspect_ai/_view/www/src/api/api-http.mjs +2 -0
  41. inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
  42. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
  43. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
  44. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
  45. inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
  46. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
  47. inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
  48. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
  49. inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
  50. inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
  51. inspect_ai/_view/www/src/components/Tools.mjs +28 -5
  52. inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
  53. inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
  54. inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
  55. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
  56. inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
  57. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
  58. inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
  59. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
  60. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
  61. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
  62. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
  63. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
  64. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
  65. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
  66. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
  67. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
  68. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
  69. inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +28 -20
  71. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  72. inspect_ai/_view/www/yarn.lock +44 -0
  73. inspect_ai/approval/_apply.py +4 -0
  74. inspect_ai/approval/_human/panel.py +5 -8
  75. inspect_ai/dataset/_dataset.py +51 -10
  76. inspect_ai/dataset/_util.py +31 -3
  77. inspect_ai/log/__init__.py +2 -0
  78. inspect_ai/log/_log.py +30 -2
  79. inspect_ai/log/_recorders/eval.py +2 -0
  80. inspect_ai/model/_call_tools.py +31 -7
  81. inspect_ai/model/_chat_message.py +3 -0
  82. inspect_ai/model/_model.py +42 -1
  83. inspect_ai/model/_providers/anthropic.py +4 -0
  84. inspect_ai/model/_providers/google.py +24 -6
  85. inspect_ai/model/_providers/openai.py +17 -3
  86. inspect_ai/model/_providers/openai_o1.py +10 -12
  87. inspect_ai/model/_render.py +9 -2
  88. inspect_ai/scorer/_metric.py +12 -1
  89. inspect_ai/solver/__init__.py +2 -0
  90. inspect_ai/solver/_human_agent/agent.py +83 -0
  91. inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
  92. inspect_ai/solver/_human_agent/commands/clock.py +70 -0
  93. inspect_ai/solver/_human_agent/commands/command.py +59 -0
  94. inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
  95. inspect_ai/solver/_human_agent/commands/note.py +42 -0
  96. inspect_ai/solver/_human_agent/commands/score.py +80 -0
  97. inspect_ai/solver/_human_agent/commands/status.py +62 -0
  98. inspect_ai/solver/_human_agent/commands/submit.py +151 -0
  99. inspect_ai/solver/_human_agent/install.py +222 -0
  100. inspect_ai/solver/_human_agent/panel.py +252 -0
  101. inspect_ai/solver/_human_agent/service.py +45 -0
  102. inspect_ai/solver/_human_agent/state.py +55 -0
  103. inspect_ai/solver/_human_agent/view.py +24 -0
  104. inspect_ai/solver/_task_state.py +28 -2
  105. inspect_ai/tool/_tool.py +10 -2
  106. inspect_ai/tool/_tool_info.py +2 -1
  107. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
  108. inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
  109. inspect_ai/util/__init__.py +12 -4
  110. inspect_ai/{_util/display.py → util/_display.py} +6 -0
  111. inspect_ai/util/_panel.py +31 -9
  112. inspect_ai/util/_sandbox/__init__.py +0 -3
  113. inspect_ai/util/_sandbox/context.py +5 -1
  114. inspect_ai/util/_sandbox/docker/compose.py +17 -13
  115. inspect_ai/util/_sandbox/docker/docker.py +9 -6
  116. inspect_ai/util/_sandbox/docker/internal.py +1 -1
  117. inspect_ai/util/_sandbox/docker/util.py +3 -2
  118. inspect_ai/util/_sandbox/environment.py +6 -5
  119. inspect_ai/util/_sandbox/local.py +1 -1
  120. inspect_ai/util/_sandbox/self_check.py +18 -18
  121. inspect_ai/util/_sandbox/service.py +22 -7
  122. inspect_ai/util/_store.py +7 -8
  123. inspect_ai/util/_store_model.py +110 -0
  124. inspect_ai/util/_subprocess.py +3 -3
  125. inspect_ai/util/_throttle.py +32 -0
  126. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
  127. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
  128. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
  129. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
  130. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
  131. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -38,9 +38,9 @@ class EnvironmentSpec:
38
38
  for i, obs_spec in enumerate(env_obs_spec.values()):
39
39
  self.observation_spec[i + 1] = convert(obs_spec)
40
40
 
41
- assert isinstance(
42
- env.action_spec(), specs.Array
43
- ), "Only a single action type is supported."
41
+ assert isinstance(env.action_spec(), specs.Array), (
42
+ "Only a single action type is supported."
43
+ )
44
44
  self.action_spec = {1: convert(env.action_spec())}
45
45
 
46
46
  self.observation_manager = spec_manager.SpecManager(self.observation_spec)
@@ -234,12 +234,12 @@ class EnvironmentService(dm_env_rpc_pb2_grpc.EnvironmentServicer):
234
234
  observations.
235
235
  """
236
236
  with self._lock:
237
- assert (
238
- cur_world in self._envs
239
- ), "Current world does not have an assosiated environment"
240
- assert (
241
- cur_world in self._joined_worlds
242
- ), "Please join world before calling step."
237
+ assert cur_world in self._envs, (
238
+ "Current world does not have an assosiated environment"
239
+ )
240
+ assert cur_world in self._joined_worlds, (
241
+ "Please join world before calling step."
242
+ )
243
243
  env = self._envs[cur_world]
244
244
  spec = self._specs[cur_world]
245
245
 
@@ -1,6 +1,8 @@
1
1
  import re
2
2
  from textwrap import dedent
3
3
 
4
+ from pydantic import Field
5
+
4
6
  from inspect_ai._util.error import PrerequisiteError
5
7
  from inspect_ai.tool._tool import Tool, ToolError, tool
6
8
  from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
@@ -8,7 +10,7 @@ from inspect_ai.tool._tool_info import parse_tool_info
8
10
  from inspect_ai.tool._tool_with import tool_with
9
11
  from inspect_ai.util._sandbox import SandboxEnvironment, sandbox_with
10
12
  from inspect_ai.util._sandbox.docker.internal import INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB
11
- from inspect_ai.util._store import store
13
+ from inspect_ai.util._store_model import StoreModel, store_as
12
14
 
13
15
 
14
16
  def web_browser(interactive: bool = True) -> list[Tool]:
@@ -97,12 +99,15 @@ def go_without_interactive_docs(tool: Tool) -> Tool:
97
99
  # custom viewer for interactive tool calls that shows a truncated
98
100
  # version of current the web accessiblity tree if available
99
101
 
100
- WEB_BROWSER_AT = "web_browser:at"
102
+
103
+ class WebBrowserStore(StoreModel):
104
+ web_at: str = Field(default_factory=str)
105
+ session_id: str = Field(default_factory=str)
101
106
 
102
107
 
103
108
  def web_at_viewer(call: ToolCall) -> ToolCallView:
104
109
  # get the web accessiblity tree, if we have it create a view from it
105
- web_at = store().get(WEB_BROWSER_AT, "")
110
+ web_at = store_as(WebBrowserStore).web_at
106
111
  element_id = call.arguments.get("element_id", 0)
107
112
  if web_at and element_id:
108
113
  lines = web_at.splitlines()
@@ -332,15 +337,14 @@ def web_browser_refresh() -> Tool:
332
337
 
333
338
  WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
334
339
  WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
335
- BROWSER_SESSION_ID = "BROWSER_SESSION_ID"
336
340
 
337
341
 
338
342
  async def web_browser_cmd(cmd: str, *args: str) -> str:
339
343
  sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
340
344
  session_flag = ""
341
345
  if sandbox_env:
342
- browser_session = store().get(BROWSER_SESSION_ID, "")
343
- if not browser_session:
346
+ store = store_as(WebBrowserStore)
347
+ if not store.session_id:
344
348
  result = await sandbox_env.exec(["python3", WEB_CLIENT_NEW_SESSION])
345
349
 
346
350
  if not result.success:
@@ -348,10 +352,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
348
352
  f"Error creating new web browser session: {result.stderr}"
349
353
  )
350
354
 
351
- browser_session = result.stdout.strip("\n")
352
- store().set(BROWSER_SESSION_ID, browser_session)
355
+ store.session_id = result.stdout.strip("\n")
353
356
 
354
- session_flag = f"--session_name={browser_session}"
357
+ session_flag = f"--session_name={store.session_id}"
355
358
 
356
359
  else:
357
360
  sandbox_env = await web_browser_sandbox()
@@ -369,7 +372,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
369
372
  )
370
373
  else:
371
374
  response = parse_web_browser_output(result.stdout)
372
- if "web_at" in response:
375
+ if "error" in response and response.get("error", "").strip() != "":
376
+ raise ToolError(str(response.get("error")) or "(unknown error)")
377
+ elif "web_at" in response:
373
378
  web_at = (
374
379
  str(response.get("web_at")) or "(no web accessiblity tree available)"
375
380
  )
@@ -379,10 +384,8 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
379
384
  line.partition("data:image/png;base64")[0] for line in web_at_lines
380
385
  ]
381
386
  web_at = "\n".join(web_at_lines)
382
- store().set(WEB_BROWSER_AT, web_at)
387
+ store_as(WebBrowserStore).web_at = web_at
383
388
  return web_at
384
- elif "error" in response:
385
- raise ToolError(str(response.get("error")) or "(unknown error)")
386
389
  else:
387
390
  raise RuntimeError(
388
391
  f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
@@ -1,5 +1,8 @@
1
+ from inspect_ai._util.trace import trace_action, trace_message
2
+
1
3
  from ._concurrency import concurrency
2
4
  from ._console import input_screen
5
+ from ._display import DisplayType, display_type
3
6
  from ._panel import InputPanel, input_panel
4
7
  from ._resource import resource
5
8
  from ._sandbox import (
@@ -11,23 +14,25 @@ from ._sandbox import (
11
14
  SandboxEnvironments,
12
15
  SandboxEnvironmentSpec,
13
16
  SandboxEnvironmentType,
14
- SandboxService,
15
17
  sandbox,
16
- sandbox_service,
17
18
  sandbox_with,
18
19
  sandboxenv,
19
20
  )
20
21
  from ._store import Store, store
22
+ from ._store_model import StoreModel, store_as
21
23
  from ._subprocess import (
22
24
  ExecResult,
23
25
  subprocess,
24
26
  )
25
27
  from ._subtask import Subtask, subtask
28
+ from ._throttle import throttle
26
29
  from ._trace import trace_enabled, trace_panel
27
30
 
28
31
  __all__ = [
29
32
  "ExecResult",
30
33
  "concurrency",
34
+ "DisplayType",
35
+ "display_type",
31
36
  "InputPanel",
32
37
  "input_panel",
33
38
  "input_screen",
@@ -44,12 +49,15 @@ __all__ = [
44
49
  "sandboxenv",
45
50
  "sandbox",
46
51
  "sandbox_with",
47
- "SandboxService",
48
- "sandbox_service",
49
52
  "Store",
50
53
  "store",
54
+ "StoreModel",
55
+ "store_as",
51
56
  "Subtask",
52
57
  "subtask",
58
+ "throttle",
53
59
  "trace_enabled",
54
60
  "trace_panel",
61
+ "trace_action",
62
+ "trace_message",
55
63
  ]
@@ -7,6 +7,7 @@ from inspect_ai._util.constants import DEFAULT_DISPLAY
7
7
  logger = getLogger(__name__)
8
8
 
9
9
  DisplayType = Literal["full", "rich", "plain", "none"]
10
+ """Console display type."""
10
11
 
11
12
 
12
13
  _display_type: DisplayType | None = None
@@ -28,6 +29,11 @@ def init_display_type(display: str | None = None) -> DisplayType:
28
29
 
29
30
 
30
31
  def display_type() -> DisplayType:
32
+ """Get the current console display type.
33
+
34
+ Returns:
35
+ DisplayType: Display type.
36
+ """
31
37
  global _display_type
32
38
  if _display_type:
33
39
  return _display_type
inspect_ai/util/_panel.py CHANGED
@@ -1,23 +1,46 @@
1
1
  from typing import Any, Protocol, TypeVar
2
2
 
3
3
  from textual.containers import Container
4
+ from typing_extensions import Self
4
5
 
5
6
 
6
7
  class InputPanel(Container):
8
+ DEFAULT_TITLE = "Panel"
9
+
7
10
  DEFAULT_CLASSES = "task-input-panel"
8
11
 
12
+ DEFAULT_CSS = """
13
+ InputPanel {
14
+ padding: 0 1 1 1;
15
+ }
16
+ """
17
+
9
18
  class Host(Protocol):
10
19
  def set_title(self, title: str) -> None: ...
11
20
  def activate(self) -> None: ...
12
21
  def deactivate(self) -> None: ...
13
22
  def close(self) -> None: ...
14
23
 
15
- def __init__(self, title: str, host: Host) -> None:
24
+ def __init__(self, host: Host) -> None:
25
+ """Initialise the panel.
26
+
27
+ Panels are created as required by the input_panel() function so
28
+ you should NOT override __init__ with your own initisation (rather,
29
+ you should define reactive props and/or methods that perform
30
+ initialisation).
31
+
32
+ You should also override the `DEFAULT_TITLE` variable for your panel to
33
+ provide a default tab title (you can change the table dynamically as
34
+ required using the `title` property).
35
+
36
+ Args:
37
+ host (InputPanel.Host): Interface to UI host of input panel.
38
+ """
16
39
  super().__init__()
17
- self._title = title
40
+ self._title = self.DEFAULT_TITLE
18
41
  self._host = host
19
42
 
20
- async def __aenter__(self) -> "InputPanel":
43
+ async def __aenter__(self) -> Self:
21
44
  self.activate()
22
45
  return self
23
46
 
@@ -50,10 +73,10 @@ class InputPanel(Container):
50
73
  pass
51
74
 
52
75
 
53
- TP = TypeVar("TP", bound=InputPanel)
76
+ TP = TypeVar("TP", bound=InputPanel, covariant=True)
54
77
 
55
78
 
56
- async def input_panel(title: str, panel: type[TP]) -> TP:
79
+ async def input_panel(panel: type[TP]) -> TP:
57
80
  """Create an input panel in the task display.
58
81
 
59
82
  There can only be a single instance of an InputPanel with a given
@@ -65,19 +88,18 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
65
88
  the scope exits -- see below for open/close semantics)
66
89
 
67
90
  ```python
68
- panel = await input_panel("Custom", CustomPanel)
91
+ panel = await input_panel(CustomPanel)
69
92
  panel.activate()
70
93
  ```
71
94
 
72
95
  Activate and close an input panel using a context manager:
73
96
 
74
97
  ```python
75
- async with await input_panel("Custom", CustomPanel) as panel:
98
+ async with await input_panel(CustomPanel) as panel:
76
99
  ...
77
100
  ```
78
101
 
79
102
  Args:
80
- title (str): Input panel title.
81
103
  panel (type[TP]): Type of panel widget (must derive from `InputPanel`)
82
104
 
83
105
  Returns:
@@ -88,4 +110,4 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
88
110
  """
89
111
  from inspect_ai._display.core.active import task_screen
90
112
 
91
- return await task_screen().input_panel(title, panel)
113
+ return await task_screen().input_panel(panel)
@@ -13,7 +13,6 @@ from .environment import (
13
13
  from .limits import OutputLimitExceededError, SandboxEnvironmentLimits
14
14
  from .local import LocalSandboxEnvironment # noqa: F401
15
15
  from .registry import sandboxenv
16
- from .service import SandboxService, sandbox_service
17
16
 
18
17
  __all__ = [
19
18
  "OutputLimitExceededError",
@@ -27,6 +26,4 @@ __all__ = [
27
26
  "sandboxenv",
28
27
  "sandbox",
29
28
  "sandbox_with",
30
- "SandboxService",
31
- "sandbox_service",
32
29
  ]
@@ -24,6 +24,10 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
24
24
 
25
25
  Return:
26
26
  SandboxEnvironment instance.
27
+
28
+ Raises:
29
+ ProcessLookupError: If there are no sandboxes available.
30
+ ValueError: If an invalid sandbox name is specified.
27
31
  """
28
32
  # verify we have a context
29
33
  environments = sandbox_environments_context_var.get(None)
@@ -102,7 +106,7 @@ async def sandbox_connections() -> dict[str, SandboxConnection]:
102
106
 
103
107
 
104
108
  def raise_no_sandbox() -> NoReturn:
105
- raise RuntimeError(
109
+ raise ProcessLookupError(
106
110
  "No sandbox environment has been provided for the current sample or task. "
107
111
  + "Please specify a sandbox for the sample or a global default sandbox for the task"
108
112
  )
@@ -8,8 +8,8 @@ from typing import Any, Literal, TypedDict, cast
8
8
  import yaml
9
9
  from pydantic import BaseModel
10
10
 
11
- from inspect_ai._util.display import display_type
12
11
  from inspect_ai._util.error import PrerequisiteError
12
+ from inspect_ai.util._display import display_type
13
13
  from inspect_ai.util._subprocess import ExecResult, subprocess
14
14
 
15
15
  from .prereqs import (
@@ -29,11 +29,11 @@ async def compose_up(project: ComposeProject) -> None:
29
29
  result = await compose_command(
30
30
  ["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
31
31
  project=project,
32
+ # wait up to 5 minutes for container to go up (compose wait + 3 minutes)
33
+ timeout=300,
32
34
  )
33
35
  if not result.success:
34
- msg = (
35
- f"Failed to start docker services for {project.config}: " f"{result.stderr}"
36
- )
36
+ msg = f"Failed to start docker services for {project.config}: {result.stderr}"
37
37
  raise RuntimeError(msg)
38
38
 
39
39
 
@@ -80,7 +80,11 @@ async def compose_cp(
80
80
  output_limit: int | None = None,
81
81
  ) -> None:
82
82
  result = await compose_command(
83
- ["cp", "--", src, dest], project=project, cwd=cwd, output_limit=output_limit
83
+ ["cp", "--", src, dest],
84
+ project=project,
85
+ timeout=120, # 2-minute timeout for file copies
86
+ cwd=cwd,
87
+ output_limit=output_limit,
84
88
  )
85
89
  if not result.success:
86
90
  msg = f"Failed to copy file from '{src}' to '{dest}': {result.stderr}"
@@ -118,7 +122,7 @@ async def compose_ps(
118
122
  command.append("--all")
119
123
  if status:
120
124
  command = command + ["--status", status]
121
- result = await compose_command(command, project=project)
125
+ result = await compose_command(command, project=project, timeout=60)
122
126
  if not result.success:
123
127
  msg = f"Error querying for running services: {result.stderr}"
124
128
  raise RuntimeError(msg)
@@ -136,6 +140,7 @@ async def compose_build(project: ComposeProject, capture_output: bool = False) -
136
140
  result = await compose_command(
137
141
  ["build"],
138
142
  project=project,
143
+ timeout=None, # no timeout for build
139
144
  capture_output=capture_output,
140
145
  )
141
146
  if not result.success:
@@ -151,6 +156,7 @@ async def compose_pull(
151
156
  return await compose_command(
152
157
  ["pull", "--ignore-buildable", "--policy", "missing", service],
153
158
  project=project,
159
+ timeout=None, # no timeout for pull
154
160
  capture_output=capture_output,
155
161
  )
156
162
 
@@ -185,7 +191,7 @@ ComposeService = TypedDict(
185
191
 
186
192
 
187
193
  async def compose_services(project: ComposeProject) -> dict[str, ComposeService]:
188
- result = await compose_command(["config"], project=project)
194
+ result = await compose_command(["config"], project=project, timeout=60)
189
195
  if not result.success:
190
196
  raise RuntimeError(f"Error reading docker config: {result.stderr}")
191
197
  return cast(dict[str, ComposeService], yaml.safe_load(result.stdout)["services"])
@@ -209,12 +215,13 @@ async def compose_ls() -> list[Project]:
209
215
 
210
216
  async def compose_cleanup_images(
211
217
  project: ComposeProject,
218
+ *,
212
219
  cwd: str | None = None,
213
- timeout: int | None = None,
220
+ timeout: int | None,
214
221
  ) -> None:
215
222
  # List the images that would be created for this compose
216
223
  images_result = await compose_command(
217
- ["config", "--images"], project=project, cwd=cwd
224
+ ["config", "--images"], project=project, timeout=timeout, cwd=cwd
218
225
  )
219
226
 
220
227
  # Remove those images explicitly
@@ -246,14 +253,11 @@ async def compose_cleanup_images(
246
253
  logger.warning(msg)
247
254
 
248
255
 
249
- DEFAULT_COMPOSE_TIMEOUT = 60
250
-
251
-
252
256
  async def compose_command(
253
257
  command: list[str],
254
258
  *,
255
259
  project: ComposeProject,
256
- timeout: int | None = DEFAULT_COMPOSE_TIMEOUT,
260
+ timeout: int | None,
257
261
  input: str | bytes | None = None,
258
262
  cwd: str | Path | None = None,
259
263
  forward_env: bool = True,
@@ -78,7 +78,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
78
78
  await compose_build(project)
79
79
 
80
80
  # cleanup images created during build
81
- await compose_cleanup_images(project)
81
+ await compose_cleanup_images(project, timeout=60)
82
82
 
83
83
  services = await compose_services(project)
84
84
  for name, service in services.items():
@@ -326,6 +326,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
326
326
  container_tmpfile,
327
327
  ],
328
328
  project=self._project,
329
+ timeout=60,
329
330
  )
330
331
 
331
332
  parent = PurePosixPath(file).parent
@@ -405,7 +406,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
405
406
 
406
407
  # read and return w/ appropriate encoding
407
408
  if text:
408
- with open(dest_file, "r", encoding="utf-8") as f:
409
+ with open(dest_file, "r", newline="", encoding="utf-8") as f:
409
410
  return f.read()
410
411
  else:
411
412
  with open(dest_file, "rb") as f:
@@ -424,13 +425,15 @@ class DockerSandboxEnvironment(SandboxEnvironment):
424
425
  None,
425
426
  )
426
427
 
427
- # return container login
428
+ # return container connection
428
429
  if container:
429
430
  return SandboxConnection(
430
- command=f"docker exec -it {container} /bin/bash --login",
431
- container=container,
431
+ command=f"docker exec -it {container} bash -l",
432
+ vscode_command=[
433
+ "remote-containers.attachToRunningContainer",
434
+ container,
435
+ ],
432
436
  )
433
-
434
437
  # error (not currently running)
435
438
  else:
436
439
  raise ConnectionError(
@@ -1,6 +1,6 @@
1
1
  from inspect_ai._util.constants import PKG_PATH
2
- from inspect_ai._util.display import display_type
3
2
  from inspect_ai._util.error import PrerequisiteError
3
+ from inspect_ai.util._display import display_type
4
4
  from inspect_ai.util._subprocess import subprocess
5
5
 
6
6
  INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB = "aisiuk/inspect-web-browser-tool"
@@ -84,10 +84,11 @@ def task_project_name(task: str) -> str:
84
84
  if len(task) == 0:
85
85
  task = "task"
86
86
 
87
- return f"inspect-{task}-i{uuid().lower()}"
87
+ # _- breaks docker project name constraints so we strip trailing underscores.
88
+ return f"inspect-{task[:12].rstrip('_')}-i{uuid().lower()[:6]}"
88
89
 
89
90
 
90
- inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{22}$"
91
+ inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{6,}$"
91
92
 
92
93
 
93
94
  def is_inspect_project(name: str) -> bool:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  from dataclasses import dataclass, field
5
- from typing import Awaitable, Callable, Literal, NamedTuple, Union, overload
5
+ from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
6
6
 
7
7
  from pydantic import BaseModel, Field
8
8
 
@@ -34,12 +34,9 @@ class SandboxConnection(BaseModel):
34
34
  command: str
35
35
  """Shell command to connect to sandbox."""
36
36
 
37
- vscode_command: list[str] | None = Field(default=None)
37
+ vscode_command: list[Any] | None = Field(default=None)
38
38
  """Optional vscode command (+args) to connect to sandbox."""
39
39
 
40
- container: str | None = Field(default=None)
41
- """Optional container name (will not apply to all sandboxes)."""
42
-
43
40
 
44
41
  class SandboxEnvironment(abc.ABC):
45
42
  """Environment for executing arbitrary code from tools.
@@ -205,6 +202,10 @@ class SandboxEnvironment(abc.ABC):
205
202
 
206
203
  File size is limited to 100 MiB.
207
204
 
205
+ When reading text files, implementations should preserve newline constructs
206
+ (e.g. crlf should be preserved not converted to lf). This is equivalent
207
+ to specifying `newline=""` in a call to the Python `open()` function.
208
+
208
209
  Args:
209
210
  file (str): Path to file (relative file paths will resolve to the
210
211
  per-sample working directory).
@@ -101,7 +101,7 @@ class LocalSandboxEnvironment(SandboxEnvironment):
101
101
  file = self._resolve_file(file)
102
102
  verify_read_file_size(file)
103
103
  if text:
104
- with open(file, "r", encoding="utf-8") as f:
104
+ with open(file, "r", newline="", encoding="utf-8") as f:
105
105
  return f.read()
106
106
  else:
107
107
  with open(file, "rb") as f:
@@ -75,9 +75,9 @@ async def test_read_and_write_file_text(sandbox_env: SandboxEnvironment) -> None
75
75
  written_file_string = await sandbox_env.read_file(
76
76
  "test_read_and_write_file_text.file", text=True
77
77
  )
78
- assert (
79
- "great #content\nincluding newlines" == written_file_string
80
- ), f"unexpected content: [{written_file_string}]"
78
+ assert "great #content\nincluding newlines" == written_file_string, (
79
+ f"unexpected content: [{written_file_string}]"
80
+ )
81
81
  await _cleanup_file(sandbox_env, "test_read_and_write_file_text.file")
82
82
 
83
83
 
@@ -219,9 +219,9 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
219
219
  exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; echo bar"])
220
220
  expected = "foo\nbar\n"
221
221
  # in the assertion message, we show the actual bytes to help debug newline issues
222
- assert (
223
- exec_result.stdout == expected
224
- ), f"Unexpected output:expected {expected.encode('UTF-8')!r}; got {exec_result.stdout.encode('UTF-8')!r}"
222
+ assert exec_result.stdout == expected, (
223
+ f"Unexpected output:expected {expected.encode('UTF-8')!r}; got {exec_result.stdout.encode('UTF-8')!r}"
224
+ )
225
225
 
226
226
 
227
227
  async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
@@ -248,13 +248,13 @@ async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
248
248
 
249
249
  # Test exec as different users
250
250
  root_result = await sandbox_env.exec(["whoami"], user="root")
251
- assert (
252
- root_result.stdout.strip() == "root"
253
- ), f"Expected 'root', got '{root_result.stdout.strip()}'"
251
+ assert root_result.stdout.strip() == "root", (
252
+ f"Expected 'root', got '{root_result.stdout.strip()}'"
253
+ )
254
254
  myuser_result = await sandbox_env.exec(["whoami"], user=username)
255
- assert (
256
- myuser_result.stdout.strip() == username
257
- ), f"Expected '{username}', got '{myuser_result.stdout.strip()}'"
255
+ assert myuser_result.stdout.strip() == username, (
256
+ f"Expected '{username}', got '{myuser_result.stdout.strip()}'"
257
+ )
258
258
  finally:
259
259
  # Clean up
260
260
  await sandbox_env.exec(["userdel", "-r", username], user="root")
@@ -266,9 +266,9 @@ async def test_exec_as_nonexistent_user(sandbox_env: SandboxEnvironment) -> None
266
266
  expected_error = (
267
267
  "unable to find user nonexistent: no matching entries in passwd file"
268
268
  )
269
- assert (
270
- expected_error in result.stdout
271
- ), f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
269
+ assert expected_error in result.stdout, (
270
+ f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
271
+ )
272
272
 
273
273
 
274
274
  async def test_cwd_unspecified(sandbox_env: SandboxEnvironment) -> None:
@@ -291,9 +291,9 @@ async def test_cwd_relative(sandbox_env: SandboxEnvironment) -> None:
291
291
  file_path = cwd_subdirectory + "/" + file_name
292
292
  await sandbox_env.write_file(file_path, "ls me plz")
293
293
  current_dir_contents = (await sandbox_env.exec(["ls"], cwd=cwd_subdirectory)).stdout
294
- assert (
295
- file_name in current_dir_contents
296
- ), f"{file_name} not found in {current_dir_contents}"
294
+ assert file_name in current_dir_contents, (
295
+ f"{file_name} not found in {current_dir_contents}"
296
+ )
297
297
  await _cleanup_file(sandbox_env, file_path)
298
298
 
299
299