inspect-ai 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. inspect_ai/_cli/info.py +2 -2
  2. inspect_ai/_cli/log.py +2 -2
  3. inspect_ai/_cli/score.py +2 -2
  4. inspect_ai/_display/core/display.py +19 -0
  5. inspect_ai/_display/core/panel.py +37 -7
  6. inspect_ai/_display/core/progress.py +29 -2
  7. inspect_ai/_display/core/results.py +79 -40
  8. inspect_ai/_display/core/textual.py +21 -0
  9. inspect_ai/_display/rich/display.py +28 -8
  10. inspect_ai/_display/textual/app.py +107 -1
  11. inspect_ai/_display/textual/display.py +1 -1
  12. inspect_ai/_display/textual/widgets/samples.py +132 -91
  13. inspect_ai/_display/textual/widgets/task_detail.py +236 -0
  14. inspect_ai/_display/textual/widgets/tasks.py +74 -6
  15. inspect_ai/_display/textual/widgets/toggle.py +32 -0
  16. inspect_ai/_eval/context.py +2 -0
  17. inspect_ai/_eval/eval.py +4 -3
  18. inspect_ai/_eval/loader.py +1 -1
  19. inspect_ai/_eval/run.py +35 -2
  20. inspect_ai/_eval/task/log.py +13 -11
  21. inspect_ai/_eval/task/results.py +12 -3
  22. inspect_ai/_eval/task/run.py +139 -36
  23. inspect_ai/_eval/task/sandbox.py +2 -1
  24. inspect_ai/_util/_async.py +30 -1
  25. inspect_ai/_util/file.py +31 -4
  26. inspect_ai/_util/html.py +3 -0
  27. inspect_ai/_util/logger.py +6 -5
  28. inspect_ai/_util/platform.py +5 -6
  29. inspect_ai/_util/registry.py +1 -1
  30. inspect_ai/_view/server.py +9 -9
  31. inspect_ai/_view/www/App.css +2 -2
  32. inspect_ai/_view/www/dist/assets/index.css +2 -2
  33. inspect_ai/_view/www/dist/assets/index.js +352 -294
  34. inspect_ai/_view/www/log-schema.json +13 -0
  35. inspect_ai/_view/www/package.json +1 -0
  36. inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
  37. inspect_ai/_view/www/src/components/Tools.mjs +16 -13
  38. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
  39. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
  40. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
  41. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
  42. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
  43. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  44. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
  45. inspect_ai/_view/www/yarn.lock +9 -4
  46. inspect_ai/approval/__init__.py +1 -1
  47. inspect_ai/approval/_human/approver.py +35 -0
  48. inspect_ai/approval/_human/console.py +62 -0
  49. inspect_ai/approval/_human/manager.py +108 -0
  50. inspect_ai/approval/_human/panel.py +233 -0
  51. inspect_ai/approval/_human/util.py +51 -0
  52. inspect_ai/dataset/_sources/hf.py +2 -2
  53. inspect_ai/dataset/_sources/util.py +1 -1
  54. inspect_ai/log/_file.py +106 -36
  55. inspect_ai/log/_recorders/eval.py +226 -158
  56. inspect_ai/log/_recorders/file.py +9 -6
  57. inspect_ai/log/_recorders/json.py +35 -12
  58. inspect_ai/log/_recorders/recorder.py +15 -15
  59. inspect_ai/log/_samples.py +52 -0
  60. inspect_ai/model/_model.py +14 -0
  61. inspect_ai/model/_model_output.py +4 -0
  62. inspect_ai/model/_providers/azureai.py +1 -1
  63. inspect_ai/model/_providers/hf.py +106 -4
  64. inspect_ai/model/_providers/util/__init__.py +2 -0
  65. inspect_ai/model/_providers/util/hf_handler.py +200 -0
  66. inspect_ai/scorer/_common.py +1 -1
  67. inspect_ai/solver/_plan.py +0 -8
  68. inspect_ai/solver/_task_state.py +18 -1
  69. inspect_ai/solver/_use_tools.py +9 -1
  70. inspect_ai/tool/_tool_def.py +2 -2
  71. inspect_ai/tool/_tool_info.py +14 -2
  72. inspect_ai/tool/_tool_params.py +2 -1
  73. inspect_ai/tool/_tools/_execute.py +1 -1
  74. inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
  75. inspect_ai/util/__init__.py +5 -6
  76. inspect_ai/util/_panel.py +91 -0
  77. inspect_ai/util/_sandbox/__init__.py +2 -6
  78. inspect_ai/util/_sandbox/context.py +4 -3
  79. inspect_ai/util/_sandbox/docker/compose.py +12 -2
  80. inspect_ai/util/_sandbox/docker/docker.py +19 -9
  81. inspect_ai/util/_sandbox/docker/util.py +10 -2
  82. inspect_ai/util/_sandbox/environment.py +47 -41
  83. inspect_ai/util/_sandbox/local.py +15 -10
  84. inspect_ai/util/_subprocess.py +43 -3
  85. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/METADATA +2 -2
  86. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/RECORD +90 -82
  87. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  88. inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
  89. inspect_ai/approval/_human.py +0 -123
  90. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/LICENSE +0 -0
  91. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/WHEEL +0 -0
  92. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/entry_points.txt +0 -0
  93. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/top_level.txt +0 -0
@@ -273,6 +273,10 @@ class TaskState:
273
273
  """Set limit on total messages allowed per conversation."""
274
274
  self._message_limit = messages
275
275
 
276
+ from inspect_ai.log._samples import set_active_sample_message_limit
277
+
278
+ set_active_sample_message_limit(messages)
279
+
276
280
  @property
277
281
  def token_limit(self) -> int | None:
278
282
  """Limit on total tokens allowed per conversation."""
@@ -283,11 +287,24 @@ class TaskState:
283
287
  """Set limit on total tokens allowed per conversation."""
284
288
  self._token_limit = tokens
285
289
 
290
+ from inspect_ai.log._samples import set_active_sample_token_limit
291
+
292
+ set_active_sample_token_limit(tokens)
293
+
294
+ @property
295
+ def token_usage(self) -> int:
296
+ """Total tokens used for the current sample."""
297
+ return sample_total_tokens()
298
+
286
299
  @property
287
300
  def completed(self) -> bool:
288
301
  """Is the task completed."""
302
+ # update messages
303
+ from inspect_ai.log._samples import set_active_sample_total_messages
289
304
  from inspect_ai.log._transcript import SampleLimitEvent, transcript
290
305
 
306
+ set_active_sample_total_messages(len(self.messages))
307
+
291
308
  if self._completed:
292
309
  return True
293
310
  elif self.message_limit and len(self.messages) >= self.message_limit:
@@ -302,7 +319,7 @@ class TaskState:
302
319
  )
303
320
  )
304
321
  return True
305
- elif self.token_limit and sample_total_tokens() >= self.token_limit:
322
+ elif self.token_limit and self.token_usage >= self.token_limit:
306
323
  # log if this is the first time we hit this
307
324
  if not self._token_limit_exceeded:
308
325
  self._token_limit_exceeded = True
@@ -9,6 +9,7 @@ from ._task_state import TaskState
9
9
  def use_tools(
10
10
  *tools: Tool | list[Tool],
11
11
  tool_choice: ToolChoice | None = "auto",
12
+ append: bool = False,
12
13
  ) -> Solver:
13
14
  """
14
15
  Inject tools into the task state to be used in generate().
@@ -20,6 +21,9 @@ def use_tools(
20
21
  tool_choice (ToolChoice | None): Directive indicating which
21
22
  tools the model should use. If `None` is passed, then no
22
23
  change to `tool_choice` is made.
24
+ append (bool): If `True`, then the passed-in tools are appended
25
+ to the existing tools; otherwise any existing tools are
26
+ replaced (the default)
23
27
 
24
28
  Returns:
25
29
  A solver that injects the tools and tool_choice into the task state.
@@ -42,7 +46,11 @@ def use_tools(
42
46
  else:
43
47
  add_tool(tool)
44
48
  if len(tools_update) > 0:
45
- state.tools = tools_update
49
+ if append:
50
+ existing_tools = state.tools
51
+ state.tools = existing_tools + tools_update
52
+ else:
53
+ state.tools = tools_update
46
54
 
47
55
  # set tool choice if specified
48
56
  if tool_choice is not None:
@@ -190,8 +190,8 @@ def tool_def_fields(tool: Tool) -> ToolDefFields:
190
190
  f"{context} not provided for parameter '{param_name}' of tool function '{name}'."
191
191
  )
192
192
 
193
- if param.type == "null":
194
- raise_not_provided_error("Type annotation")
193
+ if param.type is None and not param.anyOf and not param.enum:
194
+ raise_not_provided_error("Unsupported type or type annotation")
195
195
  elif not param.description:
196
196
  raise_not_provided_error("Description")
197
197
 
@@ -1,4 +1,6 @@
1
1
  import inspect
2
+ import types
3
+ import typing
2
4
  from dataclasses import is_dataclass
3
5
  from typing import (
4
6
  Any,
@@ -139,12 +141,18 @@ def parse_type(type_hint: Type[Any]) -> ToolParam:
139
141
  return ToolParam(type="string")
140
142
  elif type_hint is bool:
141
143
  return ToolParam(type="boolean")
144
+ elif type_hint is list:
145
+ return ToolParam(type="array", items=ToolParam())
146
+ elif type_hint is dict:
147
+ return ToolParam(type="object", additionalProperties=ToolParam())
142
148
  elif (
143
149
  is_dataclass(type_hint)
144
150
  or is_typeddict(type_hint)
145
151
  or (isinstance(type_hint, type) and issubclass(type_hint, BaseModel))
146
152
  ):
147
153
  return parse_object(type_hint)
154
+ elif type_hint is type(None):
155
+ return ToolParam(type="null")
148
156
  else:
149
157
  return ToolParam()
150
158
  elif origin is list or origin is List:
@@ -156,10 +164,14 @@ def parse_type(type_hint: Type[Any]) -> ToolParam:
156
164
  type="object",
157
165
  additionalProperties=parse_type(args[1]) if len(args) > 1 else ToolParam(),
158
166
  )
159
- elif origin is Union:
167
+ elif origin is Union or origin is types.UnionType:
160
168
  return ToolParam(anyOf=[parse_type(arg) for arg in args])
161
169
  elif origin is Optional:
162
- return ToolParam(anyOf=[parse_type(args[0]), ToolParam()])
170
+ return ToolParam(
171
+ anyOf=[parse_type(arg) for arg in args] + [ToolParam(type="null")]
172
+ )
173
+ elif origin is typing.Literal:
174
+ return ToolParam(enum=list(args))
163
175
 
164
176
  return ToolParam() # Default case if we can't determine the type
165
177
 
@@ -13,9 +13,10 @@ JSONType = Literal["string", "integer", "number", "boolean", "array", "object",
13
13
  class ToolParam(BaseModel):
14
14
  """Description of tool parameter in JSON Schema format."""
15
15
 
16
- type: JSONType = Field(default="null")
16
+ type: JSONType | None = Field(default=None)
17
17
  description: str | None = Field(default=None)
18
18
  default: Any = Field(default=None)
19
+ enum: list[Any] | None = Field(default=None)
19
20
  items: Optional["ToolParam"] = Field(default=None)
20
21
  properties: dict[str, "ToolParam"] | None = Field(default=None)
21
22
  additionalProperties: Optional["ToolParam"] | bool | None = Field(default=None)
@@ -75,7 +75,7 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
75
75
  Use the python function to execute Python code.
76
76
 
77
77
  The python function will only return you the stdout of the script,
78
- oo make sure to use print to see the output.
78
+ so make sure to use print to see the output.
79
79
 
80
80
  Args:
81
81
  code (str): The python code to execute.
@@ -373,6 +373,12 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
373
373
  web_at = (
374
374
  str(response.get("web_at")) or "(no web accessiblity tree available)"
375
375
  )
376
+ # Remove base64 data from images.
377
+ web_at_lines = web_at.split("\n")
378
+ web_at_lines = [
379
+ line.partition("data:image/png;base64")[0] for line in web_at_lines
380
+ ]
381
+ web_at = "\n".join(web_at_lines)
376
382
  store().set(WEB_BROWSER_AT, web_at)
377
383
  return web_at
378
384
  elif "error" in response:
@@ -1,13 +1,12 @@
1
1
  from ._concurrency import concurrency
2
2
  from ._console import input_screen
3
+ from ._panel import InputPanel, input_panel
3
4
  from ._resource import resource
4
5
  from ._sandbox import (
5
6
  OutputLimitExceededError,
6
7
  SandboxConnection,
7
- SandboxConnectionContainer,
8
- SandboxConnectionLocal,
9
- SandboxConnectionSSH,
10
8
  SandboxEnvironment,
9
+ SandboxEnvironmentConfigType,
11
10
  SandboxEnvironmentLimits,
12
11
  SandboxEnvironments,
13
12
  SandboxEnvironmentSpec,
@@ -29,19 +28,19 @@ from ._trace import trace_enabled, trace_panel
29
28
  __all__ = [
30
29
  "ExecResult",
31
30
  "concurrency",
31
+ "InputPanel",
32
+ "input_panel",
32
33
  "input_screen",
33
34
  "OutputLimitExceededError",
34
35
  "resource",
35
36
  "subprocess",
36
37
  "SandboxEnvironment",
38
+ "SandboxEnvironmentConfigType",
37
39
  "SandboxEnvironmentLimits",
38
40
  "SandboxEnvironments",
39
41
  "SandboxEnvironmentSpec",
40
42
  "SandboxEnvironmentType",
41
43
  "SandboxConnection",
42
- "SandboxConnectionContainer",
43
- "SandboxConnectionLocal",
44
- "SandboxConnectionSSH",
45
44
  "sandboxenv",
46
45
  "sandbox",
47
46
  "sandbox_with",
@@ -0,0 +1,91 @@
1
+ from typing import Any, Protocol, TypeVar
2
+
3
+ from textual.containers import Container
4
+
5
+
6
+ class InputPanel(Container):
7
+ DEFAULT_CLASSES = "task-input-panel"
8
+
9
+ class Host(Protocol):
10
+ def set_title(self, title: str) -> None: ...
11
+ def activate(self) -> None: ...
12
+ def deactivate(self) -> None: ...
13
+ def close(self) -> None: ...
14
+
15
+ def __init__(self, title: str, host: Host) -> None:
16
+ super().__init__()
17
+ self._title = title
18
+ self._host = host
19
+
20
+ async def __aenter__(self) -> "InputPanel":
21
+ self.activate()
22
+ return self
23
+
24
+ async def __aexit__(
25
+ self,
26
+ *execinfo: Any,
27
+ ) -> None:
28
+ self.close()
29
+
30
+ @property
31
+ def title(self) -> str:
32
+ return self._title
33
+
34
+ @title.setter
35
+ def title(self, title: str) -> None:
36
+ self._title = title
37
+ self._host.set_title(title)
38
+
39
+ def activate(self) -> None:
40
+ self._host.activate()
41
+
42
+ def deactivate(self) -> None:
43
+ self._host.deactivate()
44
+
45
+ def close(self) -> None:
46
+ self._host.close()
47
+
48
+ def update(self) -> None:
49
+ """Update method (called periodically e.g. once every second)"""
50
+ pass
51
+
52
+
53
+ TP = TypeVar("TP", bound=InputPanel)
54
+
55
+
56
+ async def input_panel(title: str, panel: type[TP]) -> TP:
57
+ """Create an input panel in the task display.
58
+
59
+ There can only be a single instance of an InputPanel with a given
60
+ 'title' running at once. Therefore, if the panel doesn't exist it
61
+ is created, otherwise a reference to the existing panel is returned.
62
+
63
+ Examples:
64
+ Create/activate an input panel (the panel will remain after
65
+ the scope exits -- see below for open/close semantics)
66
+
67
+ ```python
68
+ panel = await input_panel("Custom", CustomPanel)
69
+ panel.activate()
70
+ ```
71
+
72
+ Activate and close an input panel using a context manager:
73
+
74
+ ```python
75
+ async with await input_panel("Custom", CustomPanel) as panel:
76
+ ...
77
+ ```
78
+
79
+ Args:
80
+ title (str): Input panel title.
81
+ panel (type[TP]): Type of panel widget (must derive from `InputPanel`)
82
+
83
+ Returns:
84
+ InputPanel: Instance of widget running in the task display.
85
+
86
+ Raises:
87
+ NotImplementedError: If Inspect is not running in display='full' model.
88
+ """
89
+ from inspect_ai._display.core.active import task_screen
90
+
91
+ return await task_screen().input_panel(title, panel)
@@ -4,10 +4,8 @@ from .context import sandbox, sandbox_with
4
4
  from .docker.docker import DockerSandboxEnvironment # noqa: F401
5
5
  from .environment import (
6
6
  SandboxConnection,
7
- SandboxConnectionContainer,
8
- SandboxConnectionLocal,
9
- SandboxConnectionSSH,
10
7
  SandboxEnvironment,
8
+ SandboxEnvironmentConfigType,
11
9
  SandboxEnvironments,
12
10
  SandboxEnvironmentSpec,
13
11
  SandboxEnvironmentType,
@@ -20,14 +18,12 @@ from .service import SandboxService, sandbox_service
20
18
  __all__ = [
21
19
  "OutputLimitExceededError",
22
20
  "SandboxEnvironment",
21
+ "SandboxEnvironmentConfigType",
23
22
  "SandboxEnvironmentLimits",
24
23
  "SandboxEnvironments",
25
24
  "SandboxEnvironmentSpec",
26
25
  "SandboxEnvironmentType",
27
26
  "SandboxConnection",
28
- "SandboxConnectionContainer",
29
- "SandboxConnectionLocal",
30
- "SandboxConnectionSSH",
31
27
  "sandboxenv",
32
28
  "sandbox",
33
29
  "sandbox_with",
@@ -9,6 +9,7 @@ from .environment import (
9
9
  SampleInit,
10
10
  SandboxConnection,
11
11
  SandboxEnvironment,
12
+ SandboxEnvironmentConfigType,
12
13
  )
13
14
  from .registry import registry_find_sandboxenv
14
15
 
@@ -93,7 +94,7 @@ async def sandbox_connections() -> dict[str, SandboxConnection]:
93
94
  for name, environment in environments.items():
94
95
  try:
95
96
  connections[name] = await environment.connection()
96
- except NotImplementedError:
97
+ except (NotImplementedError, ConnectionError):
97
98
  pass
98
99
  return connections
99
100
  else:
@@ -110,7 +111,7 @@ def raise_no_sandbox() -> NoReturn:
110
111
  async def init_sandbox_environments_sample(
111
112
  type: str,
112
113
  task_name: str,
113
- config: str | None,
114
+ config: SandboxEnvironmentConfigType | None,
114
115
  files: dict[str, bytes],
115
116
  setup: bytes | None,
116
117
  metadata: dict[str, Any],
@@ -149,7 +150,7 @@ async def init_sandbox_environments_sample(
149
150
  async def cleanup_sandbox_environments_sample(
150
151
  type: str,
151
152
  task_name: str,
152
- config: str | None,
153
+ config: SandboxEnvironmentConfigType | None,
153
154
  environments: dict[str, SandboxEnvironment],
154
155
  interrupted: bool,
155
156
  ) -> None:
@@ -71,9 +71,15 @@ async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
71
71
 
72
72
 
73
73
  async def compose_cp(
74
- src: str, dest: str, project: ComposeProject, cwd: str | Path | None = None
74
+ src: str,
75
+ dest: str,
76
+ project: ComposeProject,
77
+ cwd: str | Path | None = None,
78
+ output_limit: int | None = None,
75
79
  ) -> None:
76
- result = await compose_command(["cp", "--", src, dest], project=project, cwd=cwd)
80
+ result = await compose_command(
81
+ ["cp", "--", src, dest], project=project, cwd=cwd, output_limit=output_limit
82
+ )
77
83
  if not result.success:
78
84
  msg = f"Failed to copy file from '{src}' to '{dest}': {result.stderr}"
79
85
  raise RuntimeError(msg)
@@ -149,6 +155,7 @@ async def compose_exec(
149
155
  project: ComposeProject,
150
156
  timeout: int | None = None,
151
157
  input: str | bytes | None = None,
158
+ output_limit: int | None = None,
152
159
  ) -> ExecResult[str]:
153
160
  return await compose_command(
154
161
  ["exec"] + command,
@@ -156,6 +163,7 @@ async def compose_exec(
156
163
  timeout=timeout,
157
164
  input=input,
158
165
  forward_env=False,
166
+ output_limit=output_limit,
159
167
  )
160
168
 
161
169
 
@@ -241,6 +249,7 @@ async def compose_command(
241
249
  cwd: str | Path | None = None,
242
250
  forward_env: bool = True,
243
251
  capture_output: bool = True,
252
+ output_limit: int | None = None,
244
253
  ansi: Literal["never", "always", "auto"] | None = None,
245
254
  ) -> ExecResult[str]:
246
255
  # The base docker compose command
@@ -278,6 +287,7 @@ async def compose_command(
278
287
  env=env,
279
288
  timeout=timeout,
280
289
  capture_output=capture_output,
290
+ output_limit=output_limit,
281
291
  )
282
292
  sandbox_log(f"compose command completed: {shlex.join(compose_command)}")
283
293
  return result
@@ -12,10 +12,14 @@ from inspect_ai.util._subprocess import ExecResult
12
12
 
13
13
  from ..environment import (
14
14
  SandboxConnection,
15
- SandboxConnectionContainer,
16
15
  SandboxEnvironment,
16
+ SandboxEnvironmentConfigType,
17
+ )
18
+ from ..limits import (
19
+ SandboxEnvironmentLimits,
20
+ verify_exec_result_size,
21
+ verify_read_file_size,
17
22
  )
18
- from ..limits import verify_exec_result_size, verify_read_file_size
19
23
  from ..registry import sandboxenv
20
24
  from .cleanup import (
21
25
  cli_cleanup,
@@ -51,7 +55,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
51
55
  return CONFIG_FILES + [DOCKERFILE]
52
56
 
53
57
  @classmethod
54
- async def task_init(cls, task_name: str, config: str | None) -> None:
58
+ async def task_init(
59
+ cls, task_name: str, config: SandboxEnvironmentConfigType | None
60
+ ) -> None:
55
61
  # validate prereqs
56
62
  await validate_prereqs()
57
63
 
@@ -98,13 +104,16 @@ class DockerSandboxEnvironment(SandboxEnvironment):
98
104
  @override
99
105
  @classmethod
100
106
  async def sample_init(
101
- cls, task_name: str, config: str | None, metadata: dict[str, str]
107
+ cls,
108
+ task_name: str,
109
+ config: SandboxEnvironmentConfigType | None,
110
+ metadata: dict[str, str],
102
111
  ) -> dict[str, SandboxEnvironment]:
103
112
  sandbox_log("setup")
104
113
 
105
114
  # create environment variables for sample metadata
106
115
  env: dict[str, str] = {}
107
- if config and Path(config).exists():
116
+ if isinstance(config, str) and Path(config).exists():
108
117
  # read the config file
109
118
  with open(config, "r") as f:
110
119
  config_text = f.read()
@@ -175,7 +184,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
175
184
  async def sample_cleanup(
176
185
  cls,
177
186
  task_name: str,
178
- config: str | None,
187
+ config: SandboxEnvironmentConfigType | None,
179
188
  environments: dict[str, SandboxEnvironment],
180
189
  interrupted: bool,
181
190
  ) -> None:
@@ -191,7 +200,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
191
200
 
192
201
  @classmethod
193
202
  async def task_cleanup(
194
- cls, task_name: str, config: str | None, cleanup: bool
203
+ cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
195
204
  ) -> None:
196
205
  await project_cleanup_shutdown(cleanup)
197
206
 
@@ -241,6 +250,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
241
250
  project=self._project,
242
251
  timeout=timeout,
243
252
  input=input,
253
+ output_limit=SandboxEnvironmentLimits.MAX_EXEC_OUTPUT_SIZE,
244
254
  )
245
255
  verify_exec_result_size(exec_result)
246
256
  if exec_result.returncode == 126 and "permission denied" in exec_result.stdout:
@@ -369,6 +379,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
369
379
  dest=os.path.basename(dest_file),
370
380
  project=self._project,
371
381
  cwd=os.path.dirname(dest_file),
382
+ output_limit=SandboxEnvironmentLimits.MAX_READ_FILE_SIZE,
372
383
  )
373
384
  except RuntimeError as ex:
374
385
  # extract the message and normalise case
@@ -413,10 +424,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
413
424
 
414
425
  # return container login
415
426
  if container:
416
- return SandboxConnectionContainer(
427
+ return SandboxConnection(
417
428
  command=f"docker exec -it {container} /bin/bash --login",
418
429
  container=container,
419
- working_dir=self._working_dir,
420
430
  )
421
431
 
422
432
  # error (not currently running)
@@ -7,6 +7,7 @@ from shortuuid import uuid
7
7
 
8
8
  from inspect_ai._util.constants import SANDBOX
9
9
 
10
+ from ..environment import SandboxEnvironmentConfigType
10
11
  from .config import (
11
12
  COMPOSE_DOCKERFILE_YAML,
12
13
  auto_compose_file,
@@ -26,10 +27,17 @@ class ComposeProject:
26
27
 
27
28
  @classmethod
28
29
  async def create(
29
- cls, name: str, config: str | None, env: dict[str, str] = {}
30
+ cls,
31
+ name: str,
32
+ config: SandboxEnvironmentConfigType | None,
33
+ env: dict[str, str] = {},
30
34
  ) -> "ComposeProject":
31
35
  # resolve config to full path if we have one
32
- config_path = Path(config).resolve() if config else None
36
+ config_path = None
37
+ if isinstance(config, str):
38
+ config_path = Path(config).resolve()
39
+ elif config is not None:
40
+ raise ValueError(f"Unsupported config type: {type(config)}. Expected str.")
33
41
 
34
42
  # if its a Dockerfile, then config is the auto-generated .compose.yaml
35
43
  if config_path and is_dockerfile(config_path.name):