inspect-ai 0.3.55__py3-none-any.whl → 0.3.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +1 -0
- inspect_ai/_cli/common.py +1 -1
- inspect_ai/_cli/trace.py +33 -20
- inspect_ai/_display/core/active.py +1 -1
- inspect_ai/_display/core/display.py +1 -1
- inspect_ai/_display/core/footer.py +1 -1
- inspect_ai/_display/core/progress.py +0 -6
- inspect_ai/_display/core/rich.py +1 -1
- inspect_ai/_display/rich/display.py +2 -2
- inspect_ai/_display/textual/app.py +15 -17
- inspect_ai/_display/textual/widgets/clock.py +3 -3
- inspect_ai/_display/textual/widgets/samples.py +6 -13
- inspect_ai/_eval/context.py +9 -1
- inspect_ai/_eval/score.py +4 -10
- inspect_ai/_eval/task/results.py +5 -4
- inspect_ai/_eval/task/run.py +6 -12
- inspect_ai/_eval/task/task.py +10 -0
- inspect_ai/_util/ansi.py +31 -0
- inspect_ai/_util/format.py +7 -0
- inspect_ai/_util/logger.py +12 -12
- inspect_ai/_util/throttle.py +10 -1
- inspect_ai/_util/trace.py +43 -47
- inspect_ai/_util/transcript.py +4 -0
- inspect_ai/_util/vscode.py +51 -0
- inspect_ai/_view/notify.py +2 -1
- inspect_ai/_view/www/App.css +22 -1
- inspect_ai/_view/www/dist/assets/index.css +2374 -2
- inspect_ai/_view/www/dist/assets/index.js +29622 -24424
- inspect_ai/_view/www/log-schema.json +138 -90
- inspect_ai/_view/www/package.json +1 -0
- inspect_ai/_view/www/src/App.mjs +1 -0
- inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
- inspect_ai/_view/www/src/components/Tools.mjs +11 -3
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
- inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +26 -12
- inspect_ai/_view/www/yarn.lock +44 -0
- inspect_ai/approval/_apply.py +4 -0
- inspect_ai/approval/_human/panel.py +5 -8
- inspect_ai/dataset/_dataset.py +51 -10
- inspect_ai/dataset/_util.py +31 -3
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_log.py +5 -2
- inspect_ai/model/_call_tools.py +4 -2
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_model.py +42 -1
- inspect_ai/model/_providers/anthropic.py +4 -0
- inspect_ai/model/_render.py +9 -2
- inspect_ai/scorer/_metric.py +12 -1
- inspect_ai/solver/__init__.py +2 -0
- inspect_ai/solver/_human_agent/agent.py +83 -0
- inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
- inspect_ai/solver/_human_agent/commands/clock.py +70 -0
- inspect_ai/solver/_human_agent/commands/command.py +59 -0
- inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
- inspect_ai/solver/_human_agent/commands/note.py +42 -0
- inspect_ai/solver/_human_agent/commands/score.py +80 -0
- inspect_ai/solver/_human_agent/commands/status.py +62 -0
- inspect_ai/solver/_human_agent/commands/submit.py +151 -0
- inspect_ai/solver/_human_agent/install.py +222 -0
- inspect_ai/solver/_human_agent/panel.py +252 -0
- inspect_ai/solver/_human_agent/service.py +45 -0
- inspect_ai/solver/_human_agent/state.py +55 -0
- inspect_ai/solver/_human_agent/view.py +24 -0
- inspect_ai/solver/_task_state.py +28 -2
- inspect_ai/tool/_tool.py +10 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +13 -10
- inspect_ai/util/__init__.py +8 -4
- inspect_ai/{_util/display.py → util/_display.py} +6 -0
- inspect_ai/util/_panel.py +31 -9
- inspect_ai/util/_sandbox/__init__.py +0 -3
- inspect_ai/util/_sandbox/context.py +5 -1
- inspect_ai/util/_sandbox/docker/compose.py +16 -10
- inspect_ai/util/_sandbox/docker/docker.py +9 -6
- inspect_ai/util/_sandbox/docker/internal.py +1 -1
- inspect_ai/util/_sandbox/docker/util.py +2 -2
- inspect_ai/util/_sandbox/environment.py +6 -5
- inspect_ai/util/_sandbox/local.py +1 -1
- inspect_ai/util/_sandbox/service.py +22 -7
- inspect_ai/util/_store.py +5 -6
- inspect_ai/util/_store_model.py +110 -0
- inspect_ai/util/_throttle.py +32 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/RECORD +95 -73
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
|
|
1
1
|
import re
|
2
2
|
from textwrap import dedent
|
3
3
|
|
4
|
+
from pydantic import Field
|
5
|
+
|
4
6
|
from inspect_ai._util.error import PrerequisiteError
|
5
7
|
from inspect_ai.tool._tool import Tool, ToolError, tool
|
6
8
|
from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
|
@@ -8,7 +10,7 @@ from inspect_ai.tool._tool_info import parse_tool_info
|
|
8
10
|
from inspect_ai.tool._tool_with import tool_with
|
9
11
|
from inspect_ai.util._sandbox import SandboxEnvironment, sandbox_with
|
10
12
|
from inspect_ai.util._sandbox.docker.internal import INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB
|
11
|
-
from inspect_ai.util.
|
13
|
+
from inspect_ai.util._store_model import StoreModel, store_as
|
12
14
|
|
13
15
|
|
14
16
|
def web_browser(interactive: bool = True) -> list[Tool]:
|
@@ -97,12 +99,15 @@ def go_without_interactive_docs(tool: Tool) -> Tool:
|
|
97
99
|
# custom viewer for interactive tool calls that shows a truncated
|
98
100
|
# version of current the web accessiblity tree if available
|
99
101
|
|
100
|
-
|
102
|
+
|
103
|
+
class WebBrowserStore(StoreModel):
|
104
|
+
web_at: str = Field(default_factory=str)
|
105
|
+
session_id: str = Field(default_factory=str)
|
101
106
|
|
102
107
|
|
103
108
|
def web_at_viewer(call: ToolCall) -> ToolCallView:
|
104
109
|
# get the web accessiblity tree, if we have it create a view from it
|
105
|
-
web_at =
|
110
|
+
web_at = store_as(WebBrowserStore).web_at
|
106
111
|
element_id = call.arguments.get("element_id", 0)
|
107
112
|
if web_at and element_id:
|
108
113
|
lines = web_at.splitlines()
|
@@ -332,15 +337,14 @@ def web_browser_refresh() -> Tool:
|
|
332
337
|
|
333
338
|
WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
|
334
339
|
WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
|
335
|
-
BROWSER_SESSION_ID = "BROWSER_SESSION_ID"
|
336
340
|
|
337
341
|
|
338
342
|
async def web_browser_cmd(cmd: str, *args: str) -> str:
|
339
343
|
sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
|
340
344
|
session_flag = ""
|
341
345
|
if sandbox_env:
|
342
|
-
|
343
|
-
if not
|
346
|
+
store = store_as(WebBrowserStore)
|
347
|
+
if not store.session_id:
|
344
348
|
result = await sandbox_env.exec(["python3", WEB_CLIENT_NEW_SESSION])
|
345
349
|
|
346
350
|
if not result.success:
|
@@ -348,10 +352,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
|
|
348
352
|
f"Error creating new web browser session: {result.stderr}"
|
349
353
|
)
|
350
354
|
|
351
|
-
|
352
|
-
store().set(BROWSER_SESSION_ID, browser_session)
|
355
|
+
store.session_id = result.stdout.strip("\n")
|
353
356
|
|
354
|
-
session_flag = f"--session_name={
|
357
|
+
session_flag = f"--session_name={store.session_id}"
|
355
358
|
|
356
359
|
else:
|
357
360
|
sandbox_env = await web_browser_sandbox()
|
@@ -379,7 +382,7 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
|
|
379
382
|
line.partition("data:image/png;base64")[0] for line in web_at_lines
|
380
383
|
]
|
381
384
|
web_at = "\n".join(web_at_lines)
|
382
|
-
|
385
|
+
store_as(WebBrowserStore).web_at = web_at
|
383
386
|
return web_at
|
384
387
|
elif "error" in response:
|
385
388
|
raise ToolError(str(response.get("error")) or "(unknown error)")
|
inspect_ai/util/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from ._concurrency import concurrency
|
2
2
|
from ._console import input_screen
|
3
|
+
from ._display import DisplayType, display_type
|
3
4
|
from ._panel import InputPanel, input_panel
|
4
5
|
from ._resource import resource
|
5
6
|
from ._sandbox import (
|
@@ -11,23 +12,25 @@ from ._sandbox import (
|
|
11
12
|
SandboxEnvironments,
|
12
13
|
SandboxEnvironmentSpec,
|
13
14
|
SandboxEnvironmentType,
|
14
|
-
SandboxService,
|
15
15
|
sandbox,
|
16
|
-
sandbox_service,
|
17
16
|
sandbox_with,
|
18
17
|
sandboxenv,
|
19
18
|
)
|
20
19
|
from ._store import Store, store
|
20
|
+
from ._store_model import StoreModel, store_as
|
21
21
|
from ._subprocess import (
|
22
22
|
ExecResult,
|
23
23
|
subprocess,
|
24
24
|
)
|
25
25
|
from ._subtask import Subtask, subtask
|
26
|
+
from ._throttle import throttle
|
26
27
|
from ._trace import trace_enabled, trace_panel
|
27
28
|
|
28
29
|
__all__ = [
|
29
30
|
"ExecResult",
|
30
31
|
"concurrency",
|
32
|
+
"DisplayType",
|
33
|
+
"display_type",
|
31
34
|
"InputPanel",
|
32
35
|
"input_panel",
|
33
36
|
"input_screen",
|
@@ -44,12 +47,13 @@ __all__ = [
|
|
44
47
|
"sandboxenv",
|
45
48
|
"sandbox",
|
46
49
|
"sandbox_with",
|
47
|
-
"SandboxService",
|
48
|
-
"sandbox_service",
|
49
50
|
"Store",
|
50
51
|
"store",
|
52
|
+
"StoreModel",
|
53
|
+
"store_as",
|
51
54
|
"Subtask",
|
52
55
|
"subtask",
|
56
|
+
"throttle",
|
53
57
|
"trace_enabled",
|
54
58
|
"trace_panel",
|
55
59
|
]
|
@@ -7,6 +7,7 @@ from inspect_ai._util.constants import DEFAULT_DISPLAY
|
|
7
7
|
logger = getLogger(__name__)
|
8
8
|
|
9
9
|
DisplayType = Literal["full", "rich", "plain", "none"]
|
10
|
+
"""Console display type."""
|
10
11
|
|
11
12
|
|
12
13
|
_display_type: DisplayType | None = None
|
@@ -28,6 +29,11 @@ def init_display_type(display: str | None = None) -> DisplayType:
|
|
28
29
|
|
29
30
|
|
30
31
|
def display_type() -> DisplayType:
|
32
|
+
"""Get the current console display type.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
DisplayType: Display type.
|
36
|
+
"""
|
31
37
|
global _display_type
|
32
38
|
if _display_type:
|
33
39
|
return _display_type
|
inspect_ai/util/_panel.py
CHANGED
@@ -1,23 +1,46 @@
|
|
1
1
|
from typing import Any, Protocol, TypeVar
|
2
2
|
|
3
3
|
from textual.containers import Container
|
4
|
+
from typing_extensions import Self
|
4
5
|
|
5
6
|
|
6
7
|
class InputPanel(Container):
|
8
|
+
DEFAULT_TITLE = "Panel"
|
9
|
+
|
7
10
|
DEFAULT_CLASSES = "task-input-panel"
|
8
11
|
|
12
|
+
DEFAULT_CSS = """
|
13
|
+
InputPanel {
|
14
|
+
padding: 0 1 1 1;
|
15
|
+
}
|
16
|
+
"""
|
17
|
+
|
9
18
|
class Host(Protocol):
|
10
19
|
def set_title(self, title: str) -> None: ...
|
11
20
|
def activate(self) -> None: ...
|
12
21
|
def deactivate(self) -> None: ...
|
13
22
|
def close(self) -> None: ...
|
14
23
|
|
15
|
-
def __init__(self,
|
24
|
+
def __init__(self, host: Host) -> None:
|
25
|
+
"""Initialise the panel.
|
26
|
+
|
27
|
+
Panels are created as required by the input_panel() function so
|
28
|
+
you should NOT override __init__ with your own initisation (rather,
|
29
|
+
you should define reactive props and/or methods that perform
|
30
|
+
initialisation).
|
31
|
+
|
32
|
+
You should also override the `DEFAULT_TITLE` variable for your panel to
|
33
|
+
provide a default tab title (you can change the table dynamically as
|
34
|
+
required using the `title` property).
|
35
|
+
|
36
|
+
Args:
|
37
|
+
host (InputPanel.Host): Interface to UI host of input panel.
|
38
|
+
"""
|
16
39
|
super().__init__()
|
17
|
-
self._title =
|
40
|
+
self._title = self.DEFAULT_TITLE
|
18
41
|
self._host = host
|
19
42
|
|
20
|
-
async def __aenter__(self) ->
|
43
|
+
async def __aenter__(self) -> Self:
|
21
44
|
self.activate()
|
22
45
|
return self
|
23
46
|
|
@@ -50,10 +73,10 @@ class InputPanel(Container):
|
|
50
73
|
pass
|
51
74
|
|
52
75
|
|
53
|
-
TP = TypeVar("TP", bound=InputPanel)
|
76
|
+
TP = TypeVar("TP", bound=InputPanel, covariant=True)
|
54
77
|
|
55
78
|
|
56
|
-
async def input_panel(
|
79
|
+
async def input_panel(panel: type[TP]) -> TP:
|
57
80
|
"""Create an input panel in the task display.
|
58
81
|
|
59
82
|
There can only be a single instance of an InputPanel with a given
|
@@ -65,19 +88,18 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
|
|
65
88
|
the scope exits -- see below for open/close semantics)
|
66
89
|
|
67
90
|
```python
|
68
|
-
panel = await input_panel(
|
91
|
+
panel = await input_panel(CustomPanel)
|
69
92
|
panel.activate()
|
70
93
|
```
|
71
94
|
|
72
95
|
Activate and close an input panel using a context manager:
|
73
96
|
|
74
97
|
```python
|
75
|
-
async with await input_panel(
|
98
|
+
async with await input_panel(CustomPanel) as panel:
|
76
99
|
...
|
77
100
|
```
|
78
101
|
|
79
102
|
Args:
|
80
|
-
title (str): Input panel title.
|
81
103
|
panel (type[TP]): Type of panel widget (must derive from `InputPanel`)
|
82
104
|
|
83
105
|
Returns:
|
@@ -88,4 +110,4 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
|
|
88
110
|
"""
|
89
111
|
from inspect_ai._display.core.active import task_screen
|
90
112
|
|
91
|
-
return await task_screen().input_panel(
|
113
|
+
return await task_screen().input_panel(panel)
|
@@ -13,7 +13,6 @@ from .environment import (
|
|
13
13
|
from .limits import OutputLimitExceededError, SandboxEnvironmentLimits
|
14
14
|
from .local import LocalSandboxEnvironment # noqa: F401
|
15
15
|
from .registry import sandboxenv
|
16
|
-
from .service import SandboxService, sandbox_service
|
17
16
|
|
18
17
|
__all__ = [
|
19
18
|
"OutputLimitExceededError",
|
@@ -27,6 +26,4 @@ __all__ = [
|
|
27
26
|
"sandboxenv",
|
28
27
|
"sandbox",
|
29
28
|
"sandbox_with",
|
30
|
-
"SandboxService",
|
31
|
-
"sandbox_service",
|
32
29
|
]
|
@@ -24,6 +24,10 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
|
|
24
24
|
|
25
25
|
Return:
|
26
26
|
SandboxEnvironment instance.
|
27
|
+
|
28
|
+
Raises:
|
29
|
+
ProcessLookupError: If there are no sandboxes available.
|
30
|
+
ValueError: If an invalid sandbox name is specified.
|
27
31
|
"""
|
28
32
|
# verify we have a context
|
29
33
|
environments = sandbox_environments_context_var.get(None)
|
@@ -102,7 +106,7 @@ async def sandbox_connections() -> dict[str, SandboxConnection]:
|
|
102
106
|
|
103
107
|
|
104
108
|
def raise_no_sandbox() -> NoReturn:
|
105
|
-
raise
|
109
|
+
raise ProcessLookupError(
|
106
110
|
"No sandbox environment has been provided for the current sample or task. "
|
107
111
|
+ "Please specify a sandbox for the sample or a global default sandbox for the task"
|
108
112
|
)
|
@@ -8,8 +8,8 @@ from typing import Any, Literal, TypedDict, cast
|
|
8
8
|
import yaml
|
9
9
|
from pydantic import BaseModel
|
10
10
|
|
11
|
-
from inspect_ai._util.display import display_type
|
12
11
|
from inspect_ai._util.error import PrerequisiteError
|
12
|
+
from inspect_ai.util._display import display_type
|
13
13
|
from inspect_ai.util._subprocess import ExecResult, subprocess
|
14
14
|
|
15
15
|
from .prereqs import (
|
@@ -29,6 +29,8 @@ async def compose_up(project: ComposeProject) -> None:
|
|
29
29
|
result = await compose_command(
|
30
30
|
["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
|
31
31
|
project=project,
|
32
|
+
# wait up to 5 minutes for container to go up (compose wait + 3 minutes)
|
33
|
+
timeout=300,
|
32
34
|
)
|
33
35
|
if not result.success:
|
34
36
|
msg = (
|
@@ -80,7 +82,11 @@ async def compose_cp(
|
|
80
82
|
output_limit: int | None = None,
|
81
83
|
) -> None:
|
82
84
|
result = await compose_command(
|
83
|
-
["cp", "--", src, dest],
|
85
|
+
["cp", "--", src, dest],
|
86
|
+
project=project,
|
87
|
+
timeout=120, # 2-minute timeout for file copies
|
88
|
+
cwd=cwd,
|
89
|
+
output_limit=output_limit,
|
84
90
|
)
|
85
91
|
if not result.success:
|
86
92
|
msg = f"Failed to copy file from '{src}' to '{dest}': {result.stderr}"
|
@@ -118,7 +124,7 @@ async def compose_ps(
|
|
118
124
|
command.append("--all")
|
119
125
|
if status:
|
120
126
|
command = command + ["--status", status]
|
121
|
-
result = await compose_command(command, project=project)
|
127
|
+
result = await compose_command(command, project=project, timeout=60)
|
122
128
|
if not result.success:
|
123
129
|
msg = f"Error querying for running services: {result.stderr}"
|
124
130
|
raise RuntimeError(msg)
|
@@ -136,6 +142,7 @@ async def compose_build(project: ComposeProject, capture_output: bool = False) -
|
|
136
142
|
result = await compose_command(
|
137
143
|
["build"],
|
138
144
|
project=project,
|
145
|
+
timeout=None, # no timeout for build
|
139
146
|
capture_output=capture_output,
|
140
147
|
)
|
141
148
|
if not result.success:
|
@@ -151,6 +158,7 @@ async def compose_pull(
|
|
151
158
|
return await compose_command(
|
152
159
|
["pull", "--ignore-buildable", "--policy", "missing", service],
|
153
160
|
project=project,
|
161
|
+
timeout=None, # no timeout for pull
|
154
162
|
capture_output=capture_output,
|
155
163
|
)
|
156
164
|
|
@@ -185,7 +193,7 @@ ComposeService = TypedDict(
|
|
185
193
|
|
186
194
|
|
187
195
|
async def compose_services(project: ComposeProject) -> dict[str, ComposeService]:
|
188
|
-
result = await compose_command(["config"], project=project)
|
196
|
+
result = await compose_command(["config"], project=project, timeout=60)
|
189
197
|
if not result.success:
|
190
198
|
raise RuntimeError(f"Error reading docker config: {result.stderr}")
|
191
199
|
return cast(dict[str, ComposeService], yaml.safe_load(result.stdout)["services"])
|
@@ -209,12 +217,13 @@ async def compose_ls() -> list[Project]:
|
|
209
217
|
|
210
218
|
async def compose_cleanup_images(
|
211
219
|
project: ComposeProject,
|
220
|
+
*,
|
212
221
|
cwd: str | None = None,
|
213
|
-
timeout: int | None
|
222
|
+
timeout: int | None,
|
214
223
|
) -> None:
|
215
224
|
# List the images that would be created for this compose
|
216
225
|
images_result = await compose_command(
|
217
|
-
["config", "--images"], project=project, cwd=cwd
|
226
|
+
["config", "--images"], project=project, timeout=timeout, cwd=cwd
|
218
227
|
)
|
219
228
|
|
220
229
|
# Remove those images explicitly
|
@@ -246,14 +255,11 @@ async def compose_cleanup_images(
|
|
246
255
|
logger.warning(msg)
|
247
256
|
|
248
257
|
|
249
|
-
DEFAULT_COMPOSE_TIMEOUT = 60
|
250
|
-
|
251
|
-
|
252
258
|
async def compose_command(
|
253
259
|
command: list[str],
|
254
260
|
*,
|
255
261
|
project: ComposeProject,
|
256
|
-
timeout: int | None
|
262
|
+
timeout: int | None,
|
257
263
|
input: str | bytes | None = None,
|
258
264
|
cwd: str | Path | None = None,
|
259
265
|
forward_env: bool = True,
|
@@ -78,7 +78,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
78
78
|
await compose_build(project)
|
79
79
|
|
80
80
|
# cleanup images created during build
|
81
|
-
await compose_cleanup_images(project)
|
81
|
+
await compose_cleanup_images(project, timeout=60)
|
82
82
|
|
83
83
|
services = await compose_services(project)
|
84
84
|
for name, service in services.items():
|
@@ -326,6 +326,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
326
326
|
container_tmpfile,
|
327
327
|
],
|
328
328
|
project=self._project,
|
329
|
+
timeout=60,
|
329
330
|
)
|
330
331
|
|
331
332
|
parent = PurePosixPath(file).parent
|
@@ -405,7 +406,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
405
406
|
|
406
407
|
# read and return w/ appropriate encoding
|
407
408
|
if text:
|
408
|
-
with open(dest_file, "r", encoding="utf-8") as f:
|
409
|
+
with open(dest_file, "r", newline="", encoding="utf-8") as f:
|
409
410
|
return f.read()
|
410
411
|
else:
|
411
412
|
with open(dest_file, "rb") as f:
|
@@ -424,13 +425,15 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
424
425
|
None,
|
425
426
|
)
|
426
427
|
|
427
|
-
# return container
|
428
|
+
# return container connection
|
428
429
|
if container:
|
429
430
|
return SandboxConnection(
|
430
|
-
command=f"docker exec -it {container}
|
431
|
-
|
431
|
+
command=f"docker exec -it {container} bash -l",
|
432
|
+
vscode_command=[
|
433
|
+
"remote-containers.attachToRunningContainer",
|
434
|
+
container,
|
435
|
+
],
|
432
436
|
)
|
433
|
-
|
434
437
|
# error (not currently running)
|
435
438
|
else:
|
436
439
|
raise ConnectionError(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from inspect_ai._util.constants import PKG_PATH
|
2
|
-
from inspect_ai._util.display import display_type
|
3
2
|
from inspect_ai._util.error import PrerequisiteError
|
3
|
+
from inspect_ai.util._display import display_type
|
4
4
|
from inspect_ai.util._subprocess import subprocess
|
5
5
|
|
6
6
|
INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB = "aisiuk/inspect-web-browser-tool"
|
@@ -84,10 +84,10 @@ def task_project_name(task: str) -> str:
|
|
84
84
|
if len(task) == 0:
|
85
85
|
task = "task"
|
86
86
|
|
87
|
-
return f"inspect-{task}-i{uuid().lower()}"
|
87
|
+
return f"inspect-{task[:12]}-i{uuid().lower()[:6]}"
|
88
88
|
|
89
89
|
|
90
|
-
inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{
|
90
|
+
inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{6,}$"
|
91
91
|
|
92
92
|
|
93
93
|
def is_inspect_project(name: str) -> bool:
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import abc
|
4
4
|
from dataclasses import dataclass, field
|
5
|
-
from typing import Awaitable, Callable, Literal, NamedTuple, Union, overload
|
5
|
+
from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
|
6
6
|
|
7
7
|
from pydantic import BaseModel, Field
|
8
8
|
|
@@ -34,12 +34,9 @@ class SandboxConnection(BaseModel):
|
|
34
34
|
command: str
|
35
35
|
"""Shell command to connect to sandbox."""
|
36
36
|
|
37
|
-
vscode_command: list[
|
37
|
+
vscode_command: list[Any] | None = Field(default=None)
|
38
38
|
"""Optional vscode command (+args) to connect to sandbox."""
|
39
39
|
|
40
|
-
container: str | None = Field(default=None)
|
41
|
-
"""Optional container name (will not apply to all sandboxes)."""
|
42
|
-
|
43
40
|
|
44
41
|
class SandboxEnvironment(abc.ABC):
|
45
42
|
"""Environment for executing arbitrary code from tools.
|
@@ -205,6 +202,10 @@ class SandboxEnvironment(abc.ABC):
|
|
205
202
|
|
206
203
|
File size is limited to 100 MiB.
|
207
204
|
|
205
|
+
When reading text files, implementations should preserve newline constructs
|
206
|
+
(e.g. crlf should be preserved not converted to lf). This is equivalent
|
207
|
+
to specifying `newline=""` in a call to the Python `open()` function.
|
208
|
+
|
208
209
|
Args:
|
209
210
|
file (str): Path to file (relative file paths will resolve to the
|
210
211
|
per-sample working directory).
|
@@ -101,7 +101,7 @@ class LocalSandboxEnvironment(SandboxEnvironment):
|
|
101
101
|
file = self._resolve_file(file)
|
102
102
|
verify_read_file_size(file)
|
103
103
|
if text:
|
104
|
-
with open(file, "r", encoding="utf-8") as f:
|
104
|
+
with open(file, "r", newline="", encoding="utf-8") as f:
|
105
105
|
return f.read()
|
106
106
|
else:
|
107
107
|
with open(file, "rb") as f:
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import json
|
3
|
+
from logging import getLogger
|
3
4
|
from pathlib import PurePosixPath
|
4
5
|
from textwrap import dedent
|
5
6
|
from typing import (
|
@@ -14,9 +15,12 @@ from inspect_ai.util._subprocess import ExecResult
|
|
14
15
|
|
15
16
|
from .environment import SandboxEnvironment
|
16
17
|
|
18
|
+
logger = getLogger(__name__)
|
19
|
+
|
20
|
+
|
17
21
|
REQUESTS_DIR = "requests"
|
18
22
|
RESPONSES_DIR = "responses"
|
19
|
-
SERVICES_DIR = "/tmp/
|
23
|
+
SERVICES_DIR = "/var/tmp/sandbox-services"
|
20
24
|
|
21
25
|
ID = "id"
|
22
26
|
METHOD = "method"
|
@@ -70,7 +74,7 @@ class SandboxService:
|
|
70
74
|
|
71
75
|
```python
|
72
76
|
import sys
|
73
|
-
sys.path.append("/tmp/
|
77
|
+
sys.path.append("/var/tmp/sandbox-services/foo")
|
74
78
|
import foo
|
75
79
|
```
|
76
80
|
|
@@ -79,7 +83,7 @@ class SandboxService:
|
|
79
83
|
```python
|
80
84
|
import importlib.util
|
81
85
|
spec = importlib.util.spec_from_file_location(
|
82
|
-
"foo", "/tmp/
|
86
|
+
"foo", "/var/tmp/sandbox-services/foo/foo.py"
|
83
87
|
)
|
84
88
|
foo = importlib.util.module_from_spec(spec)
|
85
89
|
spec.loader.exec_module(foo)
|
@@ -150,8 +154,14 @@ class SandboxService:
|
|
150
154
|
f"Error reading request for service {self._name}: '{read_request}' ({result.stderr})"
|
151
155
|
)
|
152
156
|
|
153
|
-
# parse request
|
154
|
-
|
157
|
+
# parse request (decode error could occur if its incomplete so bypass this)
|
158
|
+
try:
|
159
|
+
request_data = json.loads(result.stdout)
|
160
|
+
except json.JSONDecodeError:
|
161
|
+
logger.warning(
|
162
|
+
f"JSON decoding error reading service request: {result.stdout}"
|
163
|
+
)
|
164
|
+
return None
|
155
165
|
if not isinstance(request_data, dict):
|
156
166
|
raise TypeError(f"Service request is not a dict (type={request_data})")
|
157
167
|
|
@@ -275,7 +285,7 @@ class SandboxService:
|
|
275
285
|
return request_id
|
276
286
|
|
277
287
|
def _read_{self._name}_response(request_id: str) -> tuple[bool, Any]:
|
278
|
-
from json import load
|
288
|
+
from json import JSONDecodeError, load
|
279
289
|
from pathlib import Path
|
280
290
|
|
281
291
|
responses_dir = Path("{SERVICES_DIR}", "{self._name}", "{RESPONSES_DIR}")
|
@@ -283,7 +293,12 @@ class SandboxService:
|
|
283
293
|
if response_path.exists():
|
284
294
|
# read and remove the file
|
285
295
|
with open(response_path, "r") as f:
|
286
|
-
|
296
|
+
# it's possible the file is still being written so
|
297
|
+
# just catch and wait for another retry if this occurs
|
298
|
+
try:
|
299
|
+
response = load(f)
|
300
|
+
except JSONDecodeError:
|
301
|
+
return False, None
|
287
302
|
response_path.unlink()
|
288
303
|
|
289
304
|
# raise error if we have one
|
inspect_ai/util/_store.py
CHANGED
@@ -38,14 +38,10 @@ class Store:
|
|
38
38
|
self._data: dict[str, Any] = {}
|
39
39
|
|
40
40
|
@overload
|
41
|
-
def get(self, key: str, default: None = None) -> Any:
|
42
|
-
return self._data.get(key, default)
|
41
|
+
def get(self, key: str, default: None = None) -> Any: ...
|
43
42
|
|
44
43
|
@overload
|
45
|
-
def get(self, key: str, default: VT) -> VT:
|
46
|
-
if key not in self._data.keys():
|
47
|
-
self._data[key] = default
|
48
|
-
return cast(VT, self._data.get(key, default))
|
44
|
+
def get(self, key: str, default: VT) -> VT: ...
|
49
45
|
|
50
46
|
def get(self, key: str, default: VT | None = None) -> VT | Any:
|
51
47
|
"""Get a value from the store.
|
@@ -60,6 +56,9 @@ class Store:
|
|
60
56
|
Returns:
|
61
57
|
Value if is exists, otherwise default.
|
62
58
|
"""
|
59
|
+
if default is not None:
|
60
|
+
if key not in self._data.keys():
|
61
|
+
self._data[key] = default
|
63
62
|
return cast(VT, self._data.get(key, default))
|
64
63
|
|
65
64
|
def set(self, key: str, value: Any) -> None:
|