inspect-ai 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/info.py +2 -2
- inspect_ai/_cli/log.py +2 -2
- inspect_ai/_cli/score.py +2 -2
- inspect_ai/_display/core/display.py +19 -0
- inspect_ai/_display/core/panel.py +37 -7
- inspect_ai/_display/core/progress.py +29 -2
- inspect_ai/_display/core/results.py +79 -40
- inspect_ai/_display/core/textual.py +21 -0
- inspect_ai/_display/rich/display.py +28 -8
- inspect_ai/_display/textual/app.py +107 -1
- inspect_ai/_display/textual/display.py +1 -1
- inspect_ai/_display/textual/widgets/samples.py +132 -91
- inspect_ai/_display/textual/widgets/task_detail.py +236 -0
- inspect_ai/_display/textual/widgets/tasks.py +74 -6
- inspect_ai/_display/textual/widgets/toggle.py +32 -0
- inspect_ai/_eval/context.py +2 -0
- inspect_ai/_eval/eval.py +4 -3
- inspect_ai/_eval/loader.py +1 -1
- inspect_ai/_eval/run.py +35 -2
- inspect_ai/_eval/task/log.py +13 -11
- inspect_ai/_eval/task/results.py +12 -3
- inspect_ai/_eval/task/run.py +139 -36
- inspect_ai/_eval/task/sandbox.py +2 -1
- inspect_ai/_util/_async.py +30 -1
- inspect_ai/_util/file.py +31 -4
- inspect_ai/_util/html.py +3 -0
- inspect_ai/_util/logger.py +6 -5
- inspect_ai/_util/platform.py +5 -6
- inspect_ai/_util/registry.py +1 -1
- inspect_ai/_view/server.py +9 -9
- inspect_ai/_view/www/App.css +2 -2
- inspect_ai/_view/www/dist/assets/index.css +2 -2
- inspect_ai/_view/www/dist/assets/index.js +352 -294
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/package.json +1 -0
- inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +16 -13
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
- inspect_ai/_view/www/yarn.lock +9 -4
- inspect_ai/approval/__init__.py +1 -1
- inspect_ai/approval/_human/approver.py +35 -0
- inspect_ai/approval/_human/console.py +62 -0
- inspect_ai/approval/_human/manager.py +108 -0
- inspect_ai/approval/_human/panel.py +233 -0
- inspect_ai/approval/_human/util.py +51 -0
- inspect_ai/dataset/_sources/hf.py +2 -2
- inspect_ai/dataset/_sources/util.py +1 -1
- inspect_ai/log/_file.py +106 -36
- inspect_ai/log/_recorders/eval.py +226 -158
- inspect_ai/log/_recorders/file.py +9 -6
- inspect_ai/log/_recorders/json.py +35 -12
- inspect_ai/log/_recorders/recorder.py +15 -15
- inspect_ai/log/_samples.py +52 -0
- inspect_ai/model/_model.py +14 -0
- inspect_ai/model/_model_output.py +4 -0
- inspect_ai/model/_providers/azureai.py +1 -1
- inspect_ai/model/_providers/hf.py +106 -4
- inspect_ai/model/_providers/util/__init__.py +2 -0
- inspect_ai/model/_providers/util/hf_handler.py +200 -0
- inspect_ai/scorer/_common.py +1 -1
- inspect_ai/solver/_plan.py +0 -8
- inspect_ai/solver/_task_state.py +18 -1
- inspect_ai/solver/_use_tools.py +9 -1
- inspect_ai/tool/_tool_def.py +2 -2
- inspect_ai/tool/_tool_info.py +14 -2
- inspect_ai/tool/_tool_params.py +2 -1
- inspect_ai/tool/_tools/_execute.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
- inspect_ai/util/__init__.py +5 -6
- inspect_ai/util/_panel.py +91 -0
- inspect_ai/util/_sandbox/__init__.py +2 -6
- inspect_ai/util/_sandbox/context.py +4 -3
- inspect_ai/util/_sandbox/docker/compose.py +12 -2
- inspect_ai/util/_sandbox/docker/docker.py +19 -9
- inspect_ai/util/_sandbox/docker/util.py +10 -2
- inspect_ai/util/_sandbox/environment.py +47 -41
- inspect_ai/util/_sandbox/local.py +15 -10
- inspect_ai/util/_subprocess.py +43 -3
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/RECORD +90 -82
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
- inspect_ai/approval/_human.py +0 -123
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/top_level.txt +0 -0
inspect_ai/solver/_task_state.py
CHANGED
@@ -273,6 +273,10 @@ class TaskState:
|
|
273
273
|
"""Set limit on total messages allowed per conversation."""
|
274
274
|
self._message_limit = messages
|
275
275
|
|
276
|
+
from inspect_ai.log._samples import set_active_sample_message_limit
|
277
|
+
|
278
|
+
set_active_sample_message_limit(messages)
|
279
|
+
|
276
280
|
@property
|
277
281
|
def token_limit(self) -> int | None:
|
278
282
|
"""Limit on total tokens allowed per conversation."""
|
@@ -283,11 +287,24 @@ class TaskState:
|
|
283
287
|
"""Set limit on total tokens allowed per conversation."""
|
284
288
|
self._token_limit = tokens
|
285
289
|
|
290
|
+
from inspect_ai.log._samples import set_active_sample_token_limit
|
291
|
+
|
292
|
+
set_active_sample_token_limit(tokens)
|
293
|
+
|
294
|
+
@property
|
295
|
+
def token_usage(self) -> int:
|
296
|
+
"""Total tokens used for the current sample."""
|
297
|
+
return sample_total_tokens()
|
298
|
+
|
286
299
|
@property
|
287
300
|
def completed(self) -> bool:
|
288
301
|
"""Is the task completed."""
|
302
|
+
# update messages
|
303
|
+
from inspect_ai.log._samples import set_active_sample_total_messages
|
289
304
|
from inspect_ai.log._transcript import SampleLimitEvent, transcript
|
290
305
|
|
306
|
+
set_active_sample_total_messages(len(self.messages))
|
307
|
+
|
291
308
|
if self._completed:
|
292
309
|
return True
|
293
310
|
elif self.message_limit and len(self.messages) >= self.message_limit:
|
@@ -302,7 +319,7 @@ class TaskState:
|
|
302
319
|
)
|
303
320
|
)
|
304
321
|
return True
|
305
|
-
elif self.token_limit and
|
322
|
+
elif self.token_limit and self.token_usage >= self.token_limit:
|
306
323
|
# log if this is the first time we hit this
|
307
324
|
if not self._token_limit_exceeded:
|
308
325
|
self._token_limit_exceeded = True
|
inspect_ai/solver/_use_tools.py
CHANGED
@@ -9,6 +9,7 @@ from ._task_state import TaskState
|
|
9
9
|
def use_tools(
|
10
10
|
*tools: Tool | list[Tool],
|
11
11
|
tool_choice: ToolChoice | None = "auto",
|
12
|
+
append: bool = False,
|
12
13
|
) -> Solver:
|
13
14
|
"""
|
14
15
|
Inject tools into the task state to be used in generate().
|
@@ -20,6 +21,9 @@ def use_tools(
|
|
20
21
|
tool_choice (ToolChoice | None): Directive indicating which
|
21
22
|
tools the model should use. If `None` is passed, then no
|
22
23
|
change to `tool_choice` is made.
|
24
|
+
append (bool): If `True`, then the passed-in tools are appended
|
25
|
+
to the existing tools; otherwise any existing tools are
|
26
|
+
replaced (the default)
|
23
27
|
|
24
28
|
Returns:
|
25
29
|
A solver that injects the tools and tool_choice into the task state.
|
@@ -42,7 +46,11 @@ def use_tools(
|
|
42
46
|
else:
|
43
47
|
add_tool(tool)
|
44
48
|
if len(tools_update) > 0:
|
45
|
-
|
49
|
+
if append:
|
50
|
+
existing_tools = state.tools
|
51
|
+
state.tools = existing_tools + tools_update
|
52
|
+
else:
|
53
|
+
state.tools = tools_update
|
46
54
|
|
47
55
|
# set tool choice if specified
|
48
56
|
if tool_choice is not None:
|
inspect_ai/tool/_tool_def.py
CHANGED
@@ -190,8 +190,8 @@ def tool_def_fields(tool: Tool) -> ToolDefFields:
|
|
190
190
|
f"{context} not provided for parameter '{param_name}' of tool function '{name}'."
|
191
191
|
)
|
192
192
|
|
193
|
-
if param.type
|
194
|
-
raise_not_provided_error("
|
193
|
+
if param.type is None and not param.anyOf and not param.enum:
|
194
|
+
raise_not_provided_error("Unsupported type or type annotation")
|
195
195
|
elif not param.description:
|
196
196
|
raise_not_provided_error("Description")
|
197
197
|
|
inspect_ai/tool/_tool_info.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
import inspect
|
2
|
+
import types
|
3
|
+
import typing
|
2
4
|
from dataclasses import is_dataclass
|
3
5
|
from typing import (
|
4
6
|
Any,
|
@@ -139,12 +141,18 @@ def parse_type(type_hint: Type[Any]) -> ToolParam:
|
|
139
141
|
return ToolParam(type="string")
|
140
142
|
elif type_hint is bool:
|
141
143
|
return ToolParam(type="boolean")
|
144
|
+
elif type_hint is list:
|
145
|
+
return ToolParam(type="array", items=ToolParam())
|
146
|
+
elif type_hint is dict:
|
147
|
+
return ToolParam(type="object", additionalProperties=ToolParam())
|
142
148
|
elif (
|
143
149
|
is_dataclass(type_hint)
|
144
150
|
or is_typeddict(type_hint)
|
145
151
|
or (isinstance(type_hint, type) and issubclass(type_hint, BaseModel))
|
146
152
|
):
|
147
153
|
return parse_object(type_hint)
|
154
|
+
elif type_hint is type(None):
|
155
|
+
return ToolParam(type="null")
|
148
156
|
else:
|
149
157
|
return ToolParam()
|
150
158
|
elif origin is list or origin is List:
|
@@ -156,10 +164,14 @@ def parse_type(type_hint: Type[Any]) -> ToolParam:
|
|
156
164
|
type="object",
|
157
165
|
additionalProperties=parse_type(args[1]) if len(args) > 1 else ToolParam(),
|
158
166
|
)
|
159
|
-
elif origin is Union:
|
167
|
+
elif origin is Union or origin is types.UnionType:
|
160
168
|
return ToolParam(anyOf=[parse_type(arg) for arg in args])
|
161
169
|
elif origin is Optional:
|
162
|
-
return ToolParam(
|
170
|
+
return ToolParam(
|
171
|
+
anyOf=[parse_type(arg) for arg in args] + [ToolParam(type="null")]
|
172
|
+
)
|
173
|
+
elif origin is typing.Literal:
|
174
|
+
return ToolParam(enum=list(args))
|
163
175
|
|
164
176
|
return ToolParam() # Default case if we can't determine the type
|
165
177
|
|
inspect_ai/tool/_tool_params.py
CHANGED
@@ -13,9 +13,10 @@ JSONType = Literal["string", "integer", "number", "boolean", "array", "object",
|
|
13
13
|
class ToolParam(BaseModel):
|
14
14
|
"""Description of tool parameter in JSON Schema format."""
|
15
15
|
|
16
|
-
type: JSONType = Field(default=
|
16
|
+
type: JSONType | None = Field(default=None)
|
17
17
|
description: str | None = Field(default=None)
|
18
18
|
default: Any = Field(default=None)
|
19
|
+
enum: list[Any] | None = Field(default=None)
|
19
20
|
items: Optional["ToolParam"] = Field(default=None)
|
20
21
|
properties: dict[str, "ToolParam"] | None = Field(default=None)
|
21
22
|
additionalProperties: Optional["ToolParam"] | bool | None = Field(default=None)
|
@@ -75,7 +75,7 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
|
|
75
75
|
Use the python function to execute Python code.
|
76
76
|
|
77
77
|
The python function will only return you the stdout of the script,
|
78
|
-
|
78
|
+
so make sure to use print to see the output.
|
79
79
|
|
80
80
|
Args:
|
81
81
|
code (str): The python code to execute.
|
@@ -373,6 +373,12 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
|
|
373
373
|
web_at = (
|
374
374
|
str(response.get("web_at")) or "(no web accessiblity tree available)"
|
375
375
|
)
|
376
|
+
# Remove base64 data from images.
|
377
|
+
web_at_lines = web_at.split("\n")
|
378
|
+
web_at_lines = [
|
379
|
+
line.partition("data:image/png;base64")[0] for line in web_at_lines
|
380
|
+
]
|
381
|
+
web_at = "\n".join(web_at_lines)
|
376
382
|
store().set(WEB_BROWSER_AT, web_at)
|
377
383
|
return web_at
|
378
384
|
elif "error" in response:
|
inspect_ai/util/__init__.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
from ._concurrency import concurrency
|
2
2
|
from ._console import input_screen
|
3
|
+
from ._panel import InputPanel, input_panel
|
3
4
|
from ._resource import resource
|
4
5
|
from ._sandbox import (
|
5
6
|
OutputLimitExceededError,
|
6
7
|
SandboxConnection,
|
7
|
-
SandboxConnectionContainer,
|
8
|
-
SandboxConnectionLocal,
|
9
|
-
SandboxConnectionSSH,
|
10
8
|
SandboxEnvironment,
|
9
|
+
SandboxEnvironmentConfigType,
|
11
10
|
SandboxEnvironmentLimits,
|
12
11
|
SandboxEnvironments,
|
13
12
|
SandboxEnvironmentSpec,
|
@@ -29,19 +28,19 @@ from ._trace import trace_enabled, trace_panel
|
|
29
28
|
__all__ = [
|
30
29
|
"ExecResult",
|
31
30
|
"concurrency",
|
31
|
+
"InputPanel",
|
32
|
+
"input_panel",
|
32
33
|
"input_screen",
|
33
34
|
"OutputLimitExceededError",
|
34
35
|
"resource",
|
35
36
|
"subprocess",
|
36
37
|
"SandboxEnvironment",
|
38
|
+
"SandboxEnvironmentConfigType",
|
37
39
|
"SandboxEnvironmentLimits",
|
38
40
|
"SandboxEnvironments",
|
39
41
|
"SandboxEnvironmentSpec",
|
40
42
|
"SandboxEnvironmentType",
|
41
43
|
"SandboxConnection",
|
42
|
-
"SandboxConnectionContainer",
|
43
|
-
"SandboxConnectionLocal",
|
44
|
-
"SandboxConnectionSSH",
|
45
44
|
"sandboxenv",
|
46
45
|
"sandbox",
|
47
46
|
"sandbox_with",
|
@@ -0,0 +1,91 @@
|
|
1
|
+
from typing import Any, Protocol, TypeVar
|
2
|
+
|
3
|
+
from textual.containers import Container
|
4
|
+
|
5
|
+
|
6
|
+
class InputPanel(Container):
|
7
|
+
DEFAULT_CLASSES = "task-input-panel"
|
8
|
+
|
9
|
+
class Host(Protocol):
|
10
|
+
def set_title(self, title: str) -> None: ...
|
11
|
+
def activate(self) -> None: ...
|
12
|
+
def deactivate(self) -> None: ...
|
13
|
+
def close(self) -> None: ...
|
14
|
+
|
15
|
+
def __init__(self, title: str, host: Host) -> None:
|
16
|
+
super().__init__()
|
17
|
+
self._title = title
|
18
|
+
self._host = host
|
19
|
+
|
20
|
+
async def __aenter__(self) -> "InputPanel":
|
21
|
+
self.activate()
|
22
|
+
return self
|
23
|
+
|
24
|
+
async def __aexit__(
|
25
|
+
self,
|
26
|
+
*execinfo: Any,
|
27
|
+
) -> None:
|
28
|
+
self.close()
|
29
|
+
|
30
|
+
@property
|
31
|
+
def title(self) -> str:
|
32
|
+
return self._title
|
33
|
+
|
34
|
+
@title.setter
|
35
|
+
def title(self, title: str) -> None:
|
36
|
+
self._title = title
|
37
|
+
self._host.set_title(title)
|
38
|
+
|
39
|
+
def activate(self) -> None:
|
40
|
+
self._host.activate()
|
41
|
+
|
42
|
+
def deactivate(self) -> None:
|
43
|
+
self._host.deactivate()
|
44
|
+
|
45
|
+
def close(self) -> None:
|
46
|
+
self._host.close()
|
47
|
+
|
48
|
+
def update(self) -> None:
|
49
|
+
"""Update method (called periodically e.g. once every second)"""
|
50
|
+
pass
|
51
|
+
|
52
|
+
|
53
|
+
TP = TypeVar("TP", bound=InputPanel)
|
54
|
+
|
55
|
+
|
56
|
+
async def input_panel(title: str, panel: type[TP]) -> TP:
|
57
|
+
"""Create an input panel in the task display.
|
58
|
+
|
59
|
+
There can only be a single instance of an InputPanel with a given
|
60
|
+
'title' running at once. Therefore, if the panel doesn't exist it
|
61
|
+
is created, otherwise a reference to the existing panel is returned.
|
62
|
+
|
63
|
+
Examples:
|
64
|
+
Create/activate an input panel (the panel will remain after
|
65
|
+
the scope exits -- see below for open/close semantics)
|
66
|
+
|
67
|
+
```python
|
68
|
+
panel = await input_panel("Custom", CustomPanel)
|
69
|
+
panel.activate()
|
70
|
+
```
|
71
|
+
|
72
|
+
Activate and close an input panel using a context manager:
|
73
|
+
|
74
|
+
```python
|
75
|
+
async with await input_panel("Custom", CustomPanel) as panel:
|
76
|
+
...
|
77
|
+
```
|
78
|
+
|
79
|
+
Args:
|
80
|
+
title (str): Input panel title.
|
81
|
+
panel (type[TP]): Type of panel widget (must derive from `InputPanel`)
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
InputPanel: Instance of widget running in the task display.
|
85
|
+
|
86
|
+
Raises:
|
87
|
+
NotImplementedError: If Inspect is not running in display='full' model.
|
88
|
+
"""
|
89
|
+
from inspect_ai._display.core.active import task_screen
|
90
|
+
|
91
|
+
return await task_screen().input_panel(title, panel)
|
@@ -4,10 +4,8 @@ from .context import sandbox, sandbox_with
|
|
4
4
|
from .docker.docker import DockerSandboxEnvironment # noqa: F401
|
5
5
|
from .environment import (
|
6
6
|
SandboxConnection,
|
7
|
-
SandboxConnectionContainer,
|
8
|
-
SandboxConnectionLocal,
|
9
|
-
SandboxConnectionSSH,
|
10
7
|
SandboxEnvironment,
|
8
|
+
SandboxEnvironmentConfigType,
|
11
9
|
SandboxEnvironments,
|
12
10
|
SandboxEnvironmentSpec,
|
13
11
|
SandboxEnvironmentType,
|
@@ -20,14 +18,12 @@ from .service import SandboxService, sandbox_service
|
|
20
18
|
__all__ = [
|
21
19
|
"OutputLimitExceededError",
|
22
20
|
"SandboxEnvironment",
|
21
|
+
"SandboxEnvironmentConfigType",
|
23
22
|
"SandboxEnvironmentLimits",
|
24
23
|
"SandboxEnvironments",
|
25
24
|
"SandboxEnvironmentSpec",
|
26
25
|
"SandboxEnvironmentType",
|
27
26
|
"SandboxConnection",
|
28
|
-
"SandboxConnectionContainer",
|
29
|
-
"SandboxConnectionLocal",
|
30
|
-
"SandboxConnectionSSH",
|
31
27
|
"sandboxenv",
|
32
28
|
"sandbox",
|
33
29
|
"sandbox_with",
|
@@ -9,6 +9,7 @@ from .environment import (
|
|
9
9
|
SampleInit,
|
10
10
|
SandboxConnection,
|
11
11
|
SandboxEnvironment,
|
12
|
+
SandboxEnvironmentConfigType,
|
12
13
|
)
|
13
14
|
from .registry import registry_find_sandboxenv
|
14
15
|
|
@@ -93,7 +94,7 @@ async def sandbox_connections() -> dict[str, SandboxConnection]:
|
|
93
94
|
for name, environment in environments.items():
|
94
95
|
try:
|
95
96
|
connections[name] = await environment.connection()
|
96
|
-
except NotImplementedError:
|
97
|
+
except (NotImplementedError, ConnectionError):
|
97
98
|
pass
|
98
99
|
return connections
|
99
100
|
else:
|
@@ -110,7 +111,7 @@ def raise_no_sandbox() -> NoReturn:
|
|
110
111
|
async def init_sandbox_environments_sample(
|
111
112
|
type: str,
|
112
113
|
task_name: str,
|
113
|
-
config:
|
114
|
+
config: SandboxEnvironmentConfigType | None,
|
114
115
|
files: dict[str, bytes],
|
115
116
|
setup: bytes | None,
|
116
117
|
metadata: dict[str, Any],
|
@@ -149,7 +150,7 @@ async def init_sandbox_environments_sample(
|
|
149
150
|
async def cleanup_sandbox_environments_sample(
|
150
151
|
type: str,
|
151
152
|
task_name: str,
|
152
|
-
config:
|
153
|
+
config: SandboxEnvironmentConfigType | None,
|
153
154
|
environments: dict[str, SandboxEnvironment],
|
154
155
|
interrupted: bool,
|
155
156
|
) -> None:
|
@@ -71,9 +71,15 @@ async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
|
|
71
71
|
|
72
72
|
|
73
73
|
async def compose_cp(
|
74
|
-
src: str,
|
74
|
+
src: str,
|
75
|
+
dest: str,
|
76
|
+
project: ComposeProject,
|
77
|
+
cwd: str | Path | None = None,
|
78
|
+
output_limit: int | None = None,
|
75
79
|
) -> None:
|
76
|
-
result = await compose_command(
|
80
|
+
result = await compose_command(
|
81
|
+
["cp", "--", src, dest], project=project, cwd=cwd, output_limit=output_limit
|
82
|
+
)
|
77
83
|
if not result.success:
|
78
84
|
msg = f"Failed to copy file from '{src}' to '{dest}': {result.stderr}"
|
79
85
|
raise RuntimeError(msg)
|
@@ -149,6 +155,7 @@ async def compose_exec(
|
|
149
155
|
project: ComposeProject,
|
150
156
|
timeout: int | None = None,
|
151
157
|
input: str | bytes | None = None,
|
158
|
+
output_limit: int | None = None,
|
152
159
|
) -> ExecResult[str]:
|
153
160
|
return await compose_command(
|
154
161
|
["exec"] + command,
|
@@ -156,6 +163,7 @@ async def compose_exec(
|
|
156
163
|
timeout=timeout,
|
157
164
|
input=input,
|
158
165
|
forward_env=False,
|
166
|
+
output_limit=output_limit,
|
159
167
|
)
|
160
168
|
|
161
169
|
|
@@ -241,6 +249,7 @@ async def compose_command(
|
|
241
249
|
cwd: str | Path | None = None,
|
242
250
|
forward_env: bool = True,
|
243
251
|
capture_output: bool = True,
|
252
|
+
output_limit: int | None = None,
|
244
253
|
ansi: Literal["never", "always", "auto"] | None = None,
|
245
254
|
) -> ExecResult[str]:
|
246
255
|
# The base docker compose command
|
@@ -278,6 +287,7 @@ async def compose_command(
|
|
278
287
|
env=env,
|
279
288
|
timeout=timeout,
|
280
289
|
capture_output=capture_output,
|
290
|
+
output_limit=output_limit,
|
281
291
|
)
|
282
292
|
sandbox_log(f"compose command completed: {shlex.join(compose_command)}")
|
283
293
|
return result
|
@@ -12,10 +12,14 @@ from inspect_ai.util._subprocess import ExecResult
|
|
12
12
|
|
13
13
|
from ..environment import (
|
14
14
|
SandboxConnection,
|
15
|
-
SandboxConnectionContainer,
|
16
15
|
SandboxEnvironment,
|
16
|
+
SandboxEnvironmentConfigType,
|
17
|
+
)
|
18
|
+
from ..limits import (
|
19
|
+
SandboxEnvironmentLimits,
|
20
|
+
verify_exec_result_size,
|
21
|
+
verify_read_file_size,
|
17
22
|
)
|
18
|
-
from ..limits import verify_exec_result_size, verify_read_file_size
|
19
23
|
from ..registry import sandboxenv
|
20
24
|
from .cleanup import (
|
21
25
|
cli_cleanup,
|
@@ -51,7 +55,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
51
55
|
return CONFIG_FILES + [DOCKERFILE]
|
52
56
|
|
53
57
|
@classmethod
|
54
|
-
async def task_init(
|
58
|
+
async def task_init(
|
59
|
+
cls, task_name: str, config: SandboxEnvironmentConfigType | None
|
60
|
+
) -> None:
|
55
61
|
# validate prereqs
|
56
62
|
await validate_prereqs()
|
57
63
|
|
@@ -98,13 +104,16 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
98
104
|
@override
|
99
105
|
@classmethod
|
100
106
|
async def sample_init(
|
101
|
-
cls,
|
107
|
+
cls,
|
108
|
+
task_name: str,
|
109
|
+
config: SandboxEnvironmentConfigType | None,
|
110
|
+
metadata: dict[str, str],
|
102
111
|
) -> dict[str, SandboxEnvironment]:
|
103
112
|
sandbox_log("setup")
|
104
113
|
|
105
114
|
# create environment variables for sample metadata
|
106
115
|
env: dict[str, str] = {}
|
107
|
-
if config and Path(config).exists():
|
116
|
+
if isinstance(config, str) and Path(config).exists():
|
108
117
|
# read the config file
|
109
118
|
with open(config, "r") as f:
|
110
119
|
config_text = f.read()
|
@@ -175,7 +184,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
175
184
|
async def sample_cleanup(
|
176
185
|
cls,
|
177
186
|
task_name: str,
|
178
|
-
config:
|
187
|
+
config: SandboxEnvironmentConfigType | None,
|
179
188
|
environments: dict[str, SandboxEnvironment],
|
180
189
|
interrupted: bool,
|
181
190
|
) -> None:
|
@@ -191,7 +200,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
191
200
|
|
192
201
|
@classmethod
|
193
202
|
async def task_cleanup(
|
194
|
-
cls, task_name: str, config:
|
203
|
+
cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
|
195
204
|
) -> None:
|
196
205
|
await project_cleanup_shutdown(cleanup)
|
197
206
|
|
@@ -241,6 +250,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
241
250
|
project=self._project,
|
242
251
|
timeout=timeout,
|
243
252
|
input=input,
|
253
|
+
output_limit=SandboxEnvironmentLimits.MAX_EXEC_OUTPUT_SIZE,
|
244
254
|
)
|
245
255
|
verify_exec_result_size(exec_result)
|
246
256
|
if exec_result.returncode == 126 and "permission denied" in exec_result.stdout:
|
@@ -369,6 +379,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
369
379
|
dest=os.path.basename(dest_file),
|
370
380
|
project=self._project,
|
371
381
|
cwd=os.path.dirname(dest_file),
|
382
|
+
output_limit=SandboxEnvironmentLimits.MAX_READ_FILE_SIZE,
|
372
383
|
)
|
373
384
|
except RuntimeError as ex:
|
374
385
|
# extract the message and normalise case
|
@@ -413,10 +424,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
413
424
|
|
414
425
|
# return container login
|
415
426
|
if container:
|
416
|
-
return
|
427
|
+
return SandboxConnection(
|
417
428
|
command=f"docker exec -it {container} /bin/bash --login",
|
418
429
|
container=container,
|
419
|
-
working_dir=self._working_dir,
|
420
430
|
)
|
421
431
|
|
422
432
|
# error (not currently running)
|
@@ -7,6 +7,7 @@ from shortuuid import uuid
|
|
7
7
|
|
8
8
|
from inspect_ai._util.constants import SANDBOX
|
9
9
|
|
10
|
+
from ..environment import SandboxEnvironmentConfigType
|
10
11
|
from .config import (
|
11
12
|
COMPOSE_DOCKERFILE_YAML,
|
12
13
|
auto_compose_file,
|
@@ -26,10 +27,17 @@ class ComposeProject:
|
|
26
27
|
|
27
28
|
@classmethod
|
28
29
|
async def create(
|
29
|
-
cls,
|
30
|
+
cls,
|
31
|
+
name: str,
|
32
|
+
config: SandboxEnvironmentConfigType | None,
|
33
|
+
env: dict[str, str] = {},
|
30
34
|
) -> "ComposeProject":
|
31
35
|
# resolve config to full path if we have one
|
32
|
-
config_path =
|
36
|
+
config_path = None
|
37
|
+
if isinstance(config, str):
|
38
|
+
config_path = Path(config).resolve()
|
39
|
+
elif config is not None:
|
40
|
+
raise ValueError(f"Unsupported config type: {type(config)}. Expected str.")
|
33
41
|
|
34
42
|
# if its a Dockerfile, then config is the auto-generated .compose.yaml
|
35
43
|
if config_path and is_dockerfile(config_path.name):
|