inspect-ai 0.3.55__py3-none-any.whl → 0.3.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. inspect_ai/__init__.py +1 -0
  2. inspect_ai/_cli/common.py +1 -1
  3. inspect_ai/_cli/trace.py +33 -20
  4. inspect_ai/_display/core/active.py +1 -1
  5. inspect_ai/_display/core/display.py +1 -1
  6. inspect_ai/_display/core/footer.py +1 -1
  7. inspect_ai/_display/core/progress.py +0 -6
  8. inspect_ai/_display/core/rich.py +1 -1
  9. inspect_ai/_display/rich/display.py +2 -2
  10. inspect_ai/_display/textual/app.py +15 -17
  11. inspect_ai/_display/textual/widgets/clock.py +3 -3
  12. inspect_ai/_display/textual/widgets/samples.py +6 -13
  13. inspect_ai/_eval/context.py +9 -1
  14. inspect_ai/_eval/score.py +4 -10
  15. inspect_ai/_eval/task/results.py +5 -4
  16. inspect_ai/_eval/task/run.py +6 -12
  17. inspect_ai/_eval/task/task.py +10 -0
  18. inspect_ai/_util/ansi.py +31 -0
  19. inspect_ai/_util/format.py +7 -0
  20. inspect_ai/_util/logger.py +12 -12
  21. inspect_ai/_util/throttle.py +10 -1
  22. inspect_ai/_util/trace.py +43 -47
  23. inspect_ai/_util/transcript.py +4 -0
  24. inspect_ai/_util/vscode.py +51 -0
  25. inspect_ai/_view/notify.py +2 -1
  26. inspect_ai/_view/www/App.css +22 -1
  27. inspect_ai/_view/www/dist/assets/index.css +2374 -2
  28. inspect_ai/_view/www/dist/assets/index.js +29622 -24424
  29. inspect_ai/_view/www/log-schema.json +138 -90
  30. inspect_ai/_view/www/package.json +1 -0
  31. inspect_ai/_view/www/src/App.mjs +1 -0
  32. inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
  33. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
  34. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
  35. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
  36. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
  37. inspect_ai/_view/www/src/components/Tools.mjs +11 -3
  38. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
  39. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
  40. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
  41. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
  42. inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
  43. inspect_ai/_view/www/src/types/log.d.ts +26 -12
  44. inspect_ai/_view/www/yarn.lock +44 -0
  45. inspect_ai/approval/_apply.py +4 -0
  46. inspect_ai/approval/_human/panel.py +5 -8
  47. inspect_ai/dataset/_dataset.py +51 -10
  48. inspect_ai/dataset/_util.py +31 -3
  49. inspect_ai/log/__init__.py +2 -0
  50. inspect_ai/log/_log.py +5 -2
  51. inspect_ai/model/_call_tools.py +4 -2
  52. inspect_ai/model/_chat_message.py +3 -0
  53. inspect_ai/model/_model.py +42 -1
  54. inspect_ai/model/_providers/anthropic.py +4 -0
  55. inspect_ai/model/_render.py +9 -2
  56. inspect_ai/scorer/_metric.py +12 -1
  57. inspect_ai/solver/__init__.py +2 -0
  58. inspect_ai/solver/_human_agent/agent.py +83 -0
  59. inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
  60. inspect_ai/solver/_human_agent/commands/clock.py +70 -0
  61. inspect_ai/solver/_human_agent/commands/command.py +59 -0
  62. inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
  63. inspect_ai/solver/_human_agent/commands/note.py +42 -0
  64. inspect_ai/solver/_human_agent/commands/score.py +80 -0
  65. inspect_ai/solver/_human_agent/commands/status.py +62 -0
  66. inspect_ai/solver/_human_agent/commands/submit.py +151 -0
  67. inspect_ai/solver/_human_agent/install.py +222 -0
  68. inspect_ai/solver/_human_agent/panel.py +252 -0
  69. inspect_ai/solver/_human_agent/service.py +45 -0
  70. inspect_ai/solver/_human_agent/state.py +55 -0
  71. inspect_ai/solver/_human_agent/view.py +24 -0
  72. inspect_ai/solver/_task_state.py +28 -2
  73. inspect_ai/tool/_tool.py +10 -2
  74. inspect_ai/tool/_tools/_web_browser/_web_browser.py +13 -10
  75. inspect_ai/util/__init__.py +8 -4
  76. inspect_ai/{_util/display.py → util/_display.py} +6 -0
  77. inspect_ai/util/_panel.py +31 -9
  78. inspect_ai/util/_sandbox/__init__.py +0 -3
  79. inspect_ai/util/_sandbox/context.py +5 -1
  80. inspect_ai/util/_sandbox/docker/compose.py +16 -10
  81. inspect_ai/util/_sandbox/docker/docker.py +9 -6
  82. inspect_ai/util/_sandbox/docker/internal.py +1 -1
  83. inspect_ai/util/_sandbox/docker/util.py +2 -2
  84. inspect_ai/util/_sandbox/environment.py +6 -5
  85. inspect_ai/util/_sandbox/local.py +1 -1
  86. inspect_ai/util/_sandbox/service.py +22 -7
  87. inspect_ai/util/_store.py +5 -6
  88. inspect_ai/util/_store_model.py +110 -0
  89. inspect_ai/util/_throttle.py +32 -0
  90. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/METADATA +1 -1
  91. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/RECORD +95 -73
  92. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/LICENSE +0 -0
  93. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/WHEEL +0 -0
  94. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/entry_points.txt +0 -0
  95. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
1
1
  import re
2
2
  from textwrap import dedent
3
3
 
4
+ from pydantic import Field
5
+
4
6
  from inspect_ai._util.error import PrerequisiteError
5
7
  from inspect_ai.tool._tool import Tool, ToolError, tool
6
8
  from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
@@ -8,7 +10,7 @@ from inspect_ai.tool._tool_info import parse_tool_info
8
10
  from inspect_ai.tool._tool_with import tool_with
9
11
  from inspect_ai.util._sandbox import SandboxEnvironment, sandbox_with
10
12
  from inspect_ai.util._sandbox.docker.internal import INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB
11
- from inspect_ai.util._store import store
13
+ from inspect_ai.util._store_model import StoreModel, store_as
12
14
 
13
15
 
14
16
  def web_browser(interactive: bool = True) -> list[Tool]:
@@ -97,12 +99,15 @@ def go_without_interactive_docs(tool: Tool) -> Tool:
97
99
  # custom viewer for interactive tool calls that shows a truncated
98
100
  # version of current the web accessiblity tree if available
99
101
 
100
- WEB_BROWSER_AT = "web_browser:at"
102
+
103
+ class WebBrowserStore(StoreModel):
104
+ web_at: str = Field(default_factory=str)
105
+ session_id: str = Field(default_factory=str)
101
106
 
102
107
 
103
108
  def web_at_viewer(call: ToolCall) -> ToolCallView:
104
109
  # get the web accessiblity tree, if we have it create a view from it
105
- web_at = store().get(WEB_BROWSER_AT, "")
110
+ web_at = store_as(WebBrowserStore).web_at
106
111
  element_id = call.arguments.get("element_id", 0)
107
112
  if web_at and element_id:
108
113
  lines = web_at.splitlines()
@@ -332,15 +337,14 @@ def web_browser_refresh() -> Tool:
332
337
 
333
338
  WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
334
339
  WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
335
- BROWSER_SESSION_ID = "BROWSER_SESSION_ID"
336
340
 
337
341
 
338
342
  async def web_browser_cmd(cmd: str, *args: str) -> str:
339
343
  sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
340
344
  session_flag = ""
341
345
  if sandbox_env:
342
- browser_session = store().get(BROWSER_SESSION_ID, "")
343
- if not browser_session:
346
+ store = store_as(WebBrowserStore)
347
+ if not store.session_id:
344
348
  result = await sandbox_env.exec(["python3", WEB_CLIENT_NEW_SESSION])
345
349
 
346
350
  if not result.success:
@@ -348,10 +352,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
348
352
  f"Error creating new web browser session: {result.stderr}"
349
353
  )
350
354
 
351
- browser_session = result.stdout.strip("\n")
352
- store().set(BROWSER_SESSION_ID, browser_session)
355
+ store.session_id = result.stdout.strip("\n")
353
356
 
354
- session_flag = f"--session_name={browser_session}"
357
+ session_flag = f"--session_name={store.session_id}"
355
358
 
356
359
  else:
357
360
  sandbox_env = await web_browser_sandbox()
@@ -379,7 +382,7 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
379
382
  line.partition("data:image/png;base64")[0] for line in web_at_lines
380
383
  ]
381
384
  web_at = "\n".join(web_at_lines)
382
- store().set(WEB_BROWSER_AT, web_at)
385
+ store_as(WebBrowserStore).web_at = web_at
383
386
  return web_at
384
387
  elif "error" in response:
385
388
  raise ToolError(str(response.get("error")) or "(unknown error)")
@@ -1,5 +1,6 @@
1
1
  from ._concurrency import concurrency
2
2
  from ._console import input_screen
3
+ from ._display import DisplayType, display_type
3
4
  from ._panel import InputPanel, input_panel
4
5
  from ._resource import resource
5
6
  from ._sandbox import (
@@ -11,23 +12,25 @@ from ._sandbox import (
11
12
  SandboxEnvironments,
12
13
  SandboxEnvironmentSpec,
13
14
  SandboxEnvironmentType,
14
- SandboxService,
15
15
  sandbox,
16
- sandbox_service,
17
16
  sandbox_with,
18
17
  sandboxenv,
19
18
  )
20
19
  from ._store import Store, store
20
+ from ._store_model import StoreModel, store_as
21
21
  from ._subprocess import (
22
22
  ExecResult,
23
23
  subprocess,
24
24
  )
25
25
  from ._subtask import Subtask, subtask
26
+ from ._throttle import throttle
26
27
  from ._trace import trace_enabled, trace_panel
27
28
 
28
29
  __all__ = [
29
30
  "ExecResult",
30
31
  "concurrency",
32
+ "DisplayType",
33
+ "display_type",
31
34
  "InputPanel",
32
35
  "input_panel",
33
36
  "input_screen",
@@ -44,12 +47,13 @@ __all__ = [
44
47
  "sandboxenv",
45
48
  "sandbox",
46
49
  "sandbox_with",
47
- "SandboxService",
48
- "sandbox_service",
49
50
  "Store",
50
51
  "store",
52
+ "StoreModel",
53
+ "store_as",
51
54
  "Subtask",
52
55
  "subtask",
56
+ "throttle",
53
57
  "trace_enabled",
54
58
  "trace_panel",
55
59
  ]
@@ -7,6 +7,7 @@ from inspect_ai._util.constants import DEFAULT_DISPLAY
7
7
  logger = getLogger(__name__)
8
8
 
9
9
  DisplayType = Literal["full", "rich", "plain", "none"]
10
+ """Console display type."""
10
11
 
11
12
 
12
13
  _display_type: DisplayType | None = None
@@ -28,6 +29,11 @@ def init_display_type(display: str | None = None) -> DisplayType:
28
29
 
29
30
 
30
31
  def display_type() -> DisplayType:
32
+ """Get the current console display type.
33
+
34
+ Returns:
35
+ DisplayType: Display type.
36
+ """
31
37
  global _display_type
32
38
  if _display_type:
33
39
  return _display_type
inspect_ai/util/_panel.py CHANGED
@@ -1,23 +1,46 @@
1
1
  from typing import Any, Protocol, TypeVar
2
2
 
3
3
  from textual.containers import Container
4
+ from typing_extensions import Self
4
5
 
5
6
 
6
7
  class InputPanel(Container):
8
+ DEFAULT_TITLE = "Panel"
9
+
7
10
  DEFAULT_CLASSES = "task-input-panel"
8
11
 
12
+ DEFAULT_CSS = """
13
+ InputPanel {
14
+ padding: 0 1 1 1;
15
+ }
16
+ """
17
+
9
18
  class Host(Protocol):
10
19
  def set_title(self, title: str) -> None: ...
11
20
  def activate(self) -> None: ...
12
21
  def deactivate(self) -> None: ...
13
22
  def close(self) -> None: ...
14
23
 
15
- def __init__(self, title: str, host: Host) -> None:
24
+ def __init__(self, host: Host) -> None:
25
+ """Initialise the panel.
26
+
27
+ Panels are created as required by the input_panel() function so
28
+ you should NOT override __init__ with your own initisation (rather,
29
+ you should define reactive props and/or methods that perform
30
+ initialisation).
31
+
32
+ You should also override the `DEFAULT_TITLE` variable for your panel to
33
+ provide a default tab title (you can change the table dynamically as
34
+ required using the `title` property).
35
+
36
+ Args:
37
+ host (InputPanel.Host): Interface to UI host of input panel.
38
+ """
16
39
  super().__init__()
17
- self._title = title
40
+ self._title = self.DEFAULT_TITLE
18
41
  self._host = host
19
42
 
20
- async def __aenter__(self) -> "InputPanel":
43
+ async def __aenter__(self) -> Self:
21
44
  self.activate()
22
45
  return self
23
46
 
@@ -50,10 +73,10 @@ class InputPanel(Container):
50
73
  pass
51
74
 
52
75
 
53
- TP = TypeVar("TP", bound=InputPanel)
76
+ TP = TypeVar("TP", bound=InputPanel, covariant=True)
54
77
 
55
78
 
56
- async def input_panel(title: str, panel: type[TP]) -> TP:
79
+ async def input_panel(panel: type[TP]) -> TP:
57
80
  """Create an input panel in the task display.
58
81
 
59
82
  There can only be a single instance of an InputPanel with a given
@@ -65,19 +88,18 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
65
88
  the scope exits -- see below for open/close semantics)
66
89
 
67
90
  ```python
68
- panel = await input_panel("Custom", CustomPanel)
91
+ panel = await input_panel(CustomPanel)
69
92
  panel.activate()
70
93
  ```
71
94
 
72
95
  Activate and close an input panel using a context manager:
73
96
 
74
97
  ```python
75
- async with await input_panel("Custom", CustomPanel) as panel:
98
+ async with await input_panel(CustomPanel) as panel:
76
99
  ...
77
100
  ```
78
101
 
79
102
  Args:
80
- title (str): Input panel title.
81
103
  panel (type[TP]): Type of panel widget (must derive from `InputPanel`)
82
104
 
83
105
  Returns:
@@ -88,4 +110,4 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
88
110
  """
89
111
  from inspect_ai._display.core.active import task_screen
90
112
 
91
- return await task_screen().input_panel(title, panel)
113
+ return await task_screen().input_panel(panel)
@@ -13,7 +13,6 @@ from .environment import (
13
13
  from .limits import OutputLimitExceededError, SandboxEnvironmentLimits
14
14
  from .local import LocalSandboxEnvironment # noqa: F401
15
15
  from .registry import sandboxenv
16
- from .service import SandboxService, sandbox_service
17
16
 
18
17
  __all__ = [
19
18
  "OutputLimitExceededError",
@@ -27,6 +26,4 @@ __all__ = [
27
26
  "sandboxenv",
28
27
  "sandbox",
29
28
  "sandbox_with",
30
- "SandboxService",
31
- "sandbox_service",
32
29
  ]
@@ -24,6 +24,10 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
24
24
 
25
25
  Return:
26
26
  SandboxEnvironment instance.
27
+
28
+ Raises:
29
+ ProcessLookupError: If there are no sandboxes available.
30
+ ValueError: If an invalid sandbox name is specified.
27
31
  """
28
32
  # verify we have a context
29
33
  environments = sandbox_environments_context_var.get(None)
@@ -102,7 +106,7 @@ async def sandbox_connections() -> dict[str, SandboxConnection]:
102
106
 
103
107
 
104
108
  def raise_no_sandbox() -> NoReturn:
105
- raise RuntimeError(
109
+ raise ProcessLookupError(
106
110
  "No sandbox environment has been provided for the current sample or task. "
107
111
  + "Please specify a sandbox for the sample or a global default sandbox for the task"
108
112
  )
@@ -8,8 +8,8 @@ from typing import Any, Literal, TypedDict, cast
8
8
  import yaml
9
9
  from pydantic import BaseModel
10
10
 
11
- from inspect_ai._util.display import display_type
12
11
  from inspect_ai._util.error import PrerequisiteError
12
+ from inspect_ai.util._display import display_type
13
13
  from inspect_ai.util._subprocess import ExecResult, subprocess
14
14
 
15
15
  from .prereqs import (
@@ -29,6 +29,8 @@ async def compose_up(project: ComposeProject) -> None:
29
29
  result = await compose_command(
30
30
  ["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
31
31
  project=project,
32
+ # wait up to 5 minutes for container to go up (compose wait + 3 minutes)
33
+ timeout=300,
32
34
  )
33
35
  if not result.success:
34
36
  msg = (
@@ -80,7 +82,11 @@ async def compose_cp(
80
82
  output_limit: int | None = None,
81
83
  ) -> None:
82
84
  result = await compose_command(
83
- ["cp", "--", src, dest], project=project, cwd=cwd, output_limit=output_limit
85
+ ["cp", "--", src, dest],
86
+ project=project,
87
+ timeout=120, # 2-minute timeout for file copies
88
+ cwd=cwd,
89
+ output_limit=output_limit,
84
90
  )
85
91
  if not result.success:
86
92
  msg = f"Failed to copy file from '{src}' to '{dest}': {result.stderr}"
@@ -118,7 +124,7 @@ async def compose_ps(
118
124
  command.append("--all")
119
125
  if status:
120
126
  command = command + ["--status", status]
121
- result = await compose_command(command, project=project)
127
+ result = await compose_command(command, project=project, timeout=60)
122
128
  if not result.success:
123
129
  msg = f"Error querying for running services: {result.stderr}"
124
130
  raise RuntimeError(msg)
@@ -136,6 +142,7 @@ async def compose_build(project: ComposeProject, capture_output: bool = False) -
136
142
  result = await compose_command(
137
143
  ["build"],
138
144
  project=project,
145
+ timeout=None, # no timeout for build
139
146
  capture_output=capture_output,
140
147
  )
141
148
  if not result.success:
@@ -151,6 +158,7 @@ async def compose_pull(
151
158
  return await compose_command(
152
159
  ["pull", "--ignore-buildable", "--policy", "missing", service],
153
160
  project=project,
161
+ timeout=None, # no timeout for pull
154
162
  capture_output=capture_output,
155
163
  )
156
164
 
@@ -185,7 +193,7 @@ ComposeService = TypedDict(
185
193
 
186
194
 
187
195
  async def compose_services(project: ComposeProject) -> dict[str, ComposeService]:
188
- result = await compose_command(["config"], project=project)
196
+ result = await compose_command(["config"], project=project, timeout=60)
189
197
  if not result.success:
190
198
  raise RuntimeError(f"Error reading docker config: {result.stderr}")
191
199
  return cast(dict[str, ComposeService], yaml.safe_load(result.stdout)["services"])
@@ -209,12 +217,13 @@ async def compose_ls() -> list[Project]:
209
217
 
210
218
  async def compose_cleanup_images(
211
219
  project: ComposeProject,
220
+ *,
212
221
  cwd: str | None = None,
213
- timeout: int | None = None,
222
+ timeout: int | None,
214
223
  ) -> None:
215
224
  # List the images that would be created for this compose
216
225
  images_result = await compose_command(
217
- ["config", "--images"], project=project, cwd=cwd
226
+ ["config", "--images"], project=project, timeout=timeout, cwd=cwd
218
227
  )
219
228
 
220
229
  # Remove those images explicitly
@@ -246,14 +255,11 @@ async def compose_cleanup_images(
246
255
  logger.warning(msg)
247
256
 
248
257
 
249
- DEFAULT_COMPOSE_TIMEOUT = 60
250
-
251
-
252
258
  async def compose_command(
253
259
  command: list[str],
254
260
  *,
255
261
  project: ComposeProject,
256
- timeout: int | None = DEFAULT_COMPOSE_TIMEOUT,
262
+ timeout: int | None,
257
263
  input: str | bytes | None = None,
258
264
  cwd: str | Path | None = None,
259
265
  forward_env: bool = True,
@@ -78,7 +78,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
78
78
  await compose_build(project)
79
79
 
80
80
  # cleanup images created during build
81
- await compose_cleanup_images(project)
81
+ await compose_cleanup_images(project, timeout=60)
82
82
 
83
83
  services = await compose_services(project)
84
84
  for name, service in services.items():
@@ -326,6 +326,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
326
326
  container_tmpfile,
327
327
  ],
328
328
  project=self._project,
329
+ timeout=60,
329
330
  )
330
331
 
331
332
  parent = PurePosixPath(file).parent
@@ -405,7 +406,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
405
406
 
406
407
  # read and return w/ appropriate encoding
407
408
  if text:
408
- with open(dest_file, "r", encoding="utf-8") as f:
409
+ with open(dest_file, "r", newline="", encoding="utf-8") as f:
409
410
  return f.read()
410
411
  else:
411
412
  with open(dest_file, "rb") as f:
@@ -424,13 +425,15 @@ class DockerSandboxEnvironment(SandboxEnvironment):
424
425
  None,
425
426
  )
426
427
 
427
- # return container login
428
+ # return container connection
428
429
  if container:
429
430
  return SandboxConnection(
430
- command=f"docker exec -it {container} /bin/bash --login",
431
- container=container,
431
+ command=f"docker exec -it {container} bash -l",
432
+ vscode_command=[
433
+ "remote-containers.attachToRunningContainer",
434
+ container,
435
+ ],
432
436
  )
433
-
434
437
  # error (not currently running)
435
438
  else:
436
439
  raise ConnectionError(
@@ -1,6 +1,6 @@
1
1
  from inspect_ai._util.constants import PKG_PATH
2
- from inspect_ai._util.display import display_type
3
2
  from inspect_ai._util.error import PrerequisiteError
3
+ from inspect_ai.util._display import display_type
4
4
  from inspect_ai.util._subprocess import subprocess
5
5
 
6
6
  INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB = "aisiuk/inspect-web-browser-tool"
@@ -84,10 +84,10 @@ def task_project_name(task: str) -> str:
84
84
  if len(task) == 0:
85
85
  task = "task"
86
86
 
87
- return f"inspect-{task}-i{uuid().lower()}"
87
+ return f"inspect-{task[:12]}-i{uuid().lower()[:6]}"
88
88
 
89
89
 
90
- inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{22}$"
90
+ inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{6,}$"
91
91
 
92
92
 
93
93
  def is_inspect_project(name: str) -> bool:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  from dataclasses import dataclass, field
5
- from typing import Awaitable, Callable, Literal, NamedTuple, Union, overload
5
+ from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
6
6
 
7
7
  from pydantic import BaseModel, Field
8
8
 
@@ -34,12 +34,9 @@ class SandboxConnection(BaseModel):
34
34
  command: str
35
35
  """Shell command to connect to sandbox."""
36
36
 
37
- vscode_command: list[str] | None = Field(default=None)
37
+ vscode_command: list[Any] | None = Field(default=None)
38
38
  """Optional vscode command (+args) to connect to sandbox."""
39
39
 
40
- container: str | None = Field(default=None)
41
- """Optional container name (will not apply to all sandboxes)."""
42
-
43
40
 
44
41
  class SandboxEnvironment(abc.ABC):
45
42
  """Environment for executing arbitrary code from tools.
@@ -205,6 +202,10 @@ class SandboxEnvironment(abc.ABC):
205
202
 
206
203
  File size is limited to 100 MiB.
207
204
 
205
+ When reading text files, implementations should preserve newline constructs
206
+ (e.g. crlf should be preserved not converted to lf). This is equivalent
207
+ to specifying `newline=""` in a call to the Python `open()` function.
208
+
208
209
  Args:
209
210
  file (str): Path to file (relative file paths will resolve to the
210
211
  per-sample working directory).
@@ -101,7 +101,7 @@ class LocalSandboxEnvironment(SandboxEnvironment):
101
101
  file = self._resolve_file(file)
102
102
  verify_read_file_size(file)
103
103
  if text:
104
- with open(file, "r", encoding="utf-8") as f:
104
+ with open(file, "r", newline="", encoding="utf-8") as f:
105
105
  return f.read()
106
106
  else:
107
107
  with open(file, "rb") as f:
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import json
3
+ from logging import getLogger
3
4
  from pathlib import PurePosixPath
4
5
  from textwrap import dedent
5
6
  from typing import (
@@ -14,9 +15,12 @@ from inspect_ai.util._subprocess import ExecResult
14
15
 
15
16
  from .environment import SandboxEnvironment
16
17
 
18
+ logger = getLogger(__name__)
19
+
20
+
17
21
  REQUESTS_DIR = "requests"
18
22
  RESPONSES_DIR = "responses"
19
- SERVICES_DIR = "/tmp/inspect-sandbox-services"
23
+ SERVICES_DIR = "/var/tmp/sandbox-services"
20
24
 
21
25
  ID = "id"
22
26
  METHOD = "method"
@@ -70,7 +74,7 @@ class SandboxService:
70
74
 
71
75
  ```python
72
76
  import sys
73
- sys.path.append("/tmp/inspect-sandbox-services/foo")
77
+ sys.path.append("/var/tmp/sandbox-services/foo")
74
78
  import foo
75
79
  ```
76
80
 
@@ -79,7 +83,7 @@ class SandboxService:
79
83
  ```python
80
84
  import importlib.util
81
85
  spec = importlib.util.spec_from_file_location(
82
- "foo", "/tmp/inspect-sandbox-services/foo/foo.py"
86
+ "foo", "/var/tmp/sandbox-services/foo/foo.py"
83
87
  )
84
88
  foo = importlib.util.module_from_spec(spec)
85
89
  spec.loader.exec_module(foo)
@@ -150,8 +154,14 @@ class SandboxService:
150
154
  f"Error reading request for service {self._name}: '{read_request}' ({result.stderr})"
151
155
  )
152
156
 
153
- # parse request
154
- request_data = json.loads(result.stdout)
157
+ # parse request (decode error could occur if its incomplete so bypass this)
158
+ try:
159
+ request_data = json.loads(result.stdout)
160
+ except json.JSONDecodeError:
161
+ logger.warning(
162
+ f"JSON decoding error reading service request: {result.stdout}"
163
+ )
164
+ return None
155
165
  if not isinstance(request_data, dict):
156
166
  raise TypeError(f"Service request is not a dict (type={request_data})")
157
167
 
@@ -275,7 +285,7 @@ class SandboxService:
275
285
  return request_id
276
286
 
277
287
  def _read_{self._name}_response(request_id: str) -> tuple[bool, Any]:
278
- from json import load
288
+ from json import JSONDecodeError, load
279
289
  from pathlib import Path
280
290
 
281
291
  responses_dir = Path("{SERVICES_DIR}", "{self._name}", "{RESPONSES_DIR}")
@@ -283,7 +293,12 @@ class SandboxService:
283
293
  if response_path.exists():
284
294
  # read and remove the file
285
295
  with open(response_path, "r") as f:
286
- response = load(f)
296
+ # it's possible the file is still being written so
297
+ # just catch and wait for another retry if this occurs
298
+ try:
299
+ response = load(f)
300
+ except JSONDecodeError:
301
+ return False, None
287
302
  response_path.unlink()
288
303
 
289
304
  # raise error if we have one
inspect_ai/util/_store.py CHANGED
@@ -38,14 +38,10 @@ class Store:
38
38
  self._data: dict[str, Any] = {}
39
39
 
40
40
  @overload
41
- def get(self, key: str, default: None = None) -> Any:
42
- return self._data.get(key, default)
41
+ def get(self, key: str, default: None = None) -> Any: ...
43
42
 
44
43
  @overload
45
- def get(self, key: str, default: VT) -> VT:
46
- if key not in self._data.keys():
47
- self._data[key] = default
48
- return cast(VT, self._data.get(key, default))
44
+ def get(self, key: str, default: VT) -> VT: ...
49
45
 
50
46
  def get(self, key: str, default: VT | None = None) -> VT | Any:
51
47
  """Get a value from the store.
@@ -60,6 +56,9 @@ class Store:
60
56
  Returns:
61
57
  Value if is exists, otherwise default.
62
58
  """
59
+ if default is not None:
60
+ if key not in self._data.keys():
61
+ self._data[key] = default
63
62
  return cast(VT, self._data.get(key, default))
64
63
 
65
64
  def set(self, key: str, value: Any) -> None: