inspect-ai 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. inspect_ai/_cli/eval.py +13 -1
  2. inspect_ai/_display/plain/display.py +9 -11
  3. inspect_ai/_display/textual/app.py +5 -5
  4. inspect_ai/_display/textual/widgets/samples.py +47 -18
  5. inspect_ai/_display/textual/widgets/transcript.py +25 -12
  6. inspect_ai/_eval/eval.py +14 -2
  7. inspect_ai/_eval/evalset.py +6 -1
  8. inspect_ai/_eval/run.py +6 -0
  9. inspect_ai/_eval/task/run.py +44 -15
  10. inspect_ai/_eval/task/task.py +26 -3
  11. inspect_ai/_util/interrupt.py +15 -0
  12. inspect_ai/_util/logger.py +23 -0
  13. inspect_ai/_util/rich.py +7 -8
  14. inspect_ai/_util/text.py +301 -1
  15. inspect_ai/_util/transcript.py +10 -2
  16. inspect_ai/_util/working.py +46 -0
  17. inspect_ai/_view/www/dist/assets/index.css +56 -12
  18. inspect_ai/_view/www/dist/assets/index.js +905 -751
  19. inspect_ai/_view/www/log-schema.json +337 -2
  20. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  21. inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
  22. inspect_ai/_view/www/src/appearance/icons.ts +3 -1
  23. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
  24. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
  25. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
  26. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  27. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
  28. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +1 -1
  29. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
  30. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
  31. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
  32. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
  33. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
  34. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
  35. inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
  36. inspect_ai/_view/www/src/types/log.d.ts +188 -108
  37. inspect_ai/_view/www/src/utils/format.ts +7 -4
  38. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
  39. inspect_ai/log/__init__.py +2 -0
  40. inspect_ai/log/_condense.py +1 -0
  41. inspect_ai/log/_log.py +72 -12
  42. inspect_ai/log/_samples.py +5 -5
  43. inspect_ai/log/_transcript.py +31 -1
  44. inspect_ai/model/_call_tools.py +1 -1
  45. inspect_ai/model/_conversation.py +1 -1
  46. inspect_ai/model/_model.py +35 -16
  47. inspect_ai/model/_model_call.py +10 -3
  48. inspect_ai/model/_providers/anthropic.py +13 -2
  49. inspect_ai/model/_providers/bedrock.py +7 -0
  50. inspect_ai/model/_providers/cloudflare.py +20 -7
  51. inspect_ai/model/_providers/google.py +358 -302
  52. inspect_ai/model/_providers/groq.py +57 -23
  53. inspect_ai/model/_providers/hf.py +6 -0
  54. inspect_ai/model/_providers/mistral.py +81 -52
  55. inspect_ai/model/_providers/openai.py +9 -0
  56. inspect_ai/model/_providers/providers.py +6 -6
  57. inspect_ai/model/_providers/util/tracker.py +92 -0
  58. inspect_ai/model/_providers/vllm.py +13 -5
  59. inspect_ai/solver/_basic_agent.py +1 -3
  60. inspect_ai/solver/_bridge/patch.py +0 -2
  61. inspect_ai/solver/_limit.py +4 -4
  62. inspect_ai/solver/_plan.py +3 -3
  63. inspect_ai/solver/_solver.py +3 -0
  64. inspect_ai/solver/_task_state.py +10 -1
  65. inspect_ai/tool/_tools/_web_search.py +3 -3
  66. inspect_ai/util/_concurrency.py +14 -8
  67. inspect_ai/util/_sandbox/context.py +15 -0
  68. inspect_ai/util/_sandbox/docker/cleanup.py +8 -3
  69. inspect_ai/util/_sandbox/docker/compose.py +5 -9
  70. inspect_ai/util/_sandbox/docker/docker.py +20 -6
  71. inspect_ai/util/_sandbox/docker/util.py +10 -1
  72. inspect_ai/util/_sandbox/environment.py +32 -1
  73. inspect_ai/util/_sandbox/events.py +149 -0
  74. inspect_ai/util/_sandbox/local.py +3 -3
  75. inspect_ai/util/_sandbox/self_check.py +2 -1
  76. inspect_ai/util/_subprocess.py +4 -1
  77. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +5 -5
  78. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +82 -74
  79. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
  80. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
  81. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
  82. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@ import inspect
2
2
  from logging import getLogger
3
3
  from typing import Any, Awaitable, Callable, TypeVar, cast
4
4
 
5
+ from inspect_ai._util.interrupt import check_sample_interrupt
5
6
  from inspect_ai._util.registry import (
6
7
  RegistryInfo,
7
8
  is_registry_object,
@@ -115,15 +116,14 @@ class Plan(Solver):
115
116
  with solver_transcript(self.finish, state) as st:
116
117
  state = await self.finish(state, generate)
117
118
  st.complete(state)
118
-
119
- # mark completed
120
- state.completed = True
119
+ check_sample_interrupt()
121
120
 
122
121
  finally:
123
122
  # always do cleanup if we have one
124
123
  if self.cleanup:
125
124
  try:
126
125
  await self.cleanup(state)
126
+ check_sample_interrupt()
127
127
  except Exception as ex:
128
128
  logger.warning(f"Exception occurred during plan cleanup: {ex}")
129
129
 
@@ -15,6 +15,7 @@ from typing import (
15
15
  from typing_extensions import Unpack
16
16
 
17
17
  from inspect_ai._util._async import is_callable_coroutine
18
+ from inspect_ai._util.interrupt import check_sample_interrupt
18
19
  from inspect_ai._util.registry import (
19
20
  RegistryInfo,
20
21
  registry_add,
@@ -200,6 +201,7 @@ def solver(
200
201
  state: TaskState, generate: Generate
201
202
  ) -> TaskState:
202
203
  state = await original_call(state, generate)
204
+ check_sample_interrupt()
203
205
  set_sample_state(state)
204
206
  return state
205
207
 
@@ -215,6 +217,7 @@ def solver(
215
217
  state: TaskState, generate: Generate
216
218
  ) -> TaskState:
217
219
  state = await solver(state, generate)
220
+ check_sample_interrupt()
218
221
  set_sample_state(state)
219
222
  return state
220
223
 
@@ -7,7 +7,9 @@ from random import Random
7
7
  from typing import Any, Iterable, SupportsIndex, Type, Union, cast, overload
8
8
 
9
9
  from pydantic_core import to_jsonable_python
10
+ from shortuuid import uuid
10
11
 
12
+ from inspect_ai._util.interrupt import check_sample_interrupt
11
13
  from inspect_ai.dataset._dataset import MT, Sample, metadata_as
12
14
  from inspect_ai.model import (
13
15
  ChatMessage,
@@ -164,6 +166,7 @@ class TaskState:
164
166
  self._token_limit = token_limit
165
167
  self._completed = completed
166
168
  self._store = Store()
169
+ self._uuid = uuid()
167
170
 
168
171
  if choices:
169
172
  self.choices = Choices(choices)
@@ -333,7 +336,7 @@ class TaskState:
333
336
  def completed(self) -> bool:
334
337
  """Is the task completed.
335
338
 
336
- Additionally, checks message and token limits and raises if they are exceeded.
339
+ Additionally, checks message and token limits and raises if they are exceeded, and also checks for an operator interrupt of the sample.
337
340
  """
338
341
  from inspect_ai.log._samples import set_active_sample_total_messages
339
342
 
@@ -356,6 +359,7 @@ class TaskState:
356
359
  "token", value=self.token_usage, limit=self.token_limit, state=self
357
360
  )
358
361
  else:
362
+ check_sample_interrupt()
359
363
  return self._completed
360
364
 
361
365
  @completed.setter
@@ -371,6 +375,11 @@ class TaskState:
371
375
  scores: dict[str, Score] | None = None
372
376
  """Scores yielded by running task."""
373
377
 
378
+ @property
379
+ def uuid(self) -> str:
380
+ """Globally unique identifier for sample run."""
381
+ return self._uuid
382
+
374
383
  def metadata_as(self, metadata_cls: Type[MT]) -> MT:
375
384
  """Pydantic model interface to metadata.
376
385
 
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import os
3
- from typing import Literal, Protocol, cast, runtime_checkable
3
+ from typing import Literal, Protocol, runtime_checkable
4
4
 
5
5
  import httpx
6
6
  from bs4 import BeautifulSoup, NavigableString
@@ -90,8 +90,8 @@ def web_search(
90
90
  return_exceptions=True,
91
91
  )
92
92
  for page, link in zip(pages, links):
93
- if page and not isinstance(page, Exception):
94
- page_contents.append(cast(str, page))
93
+ if page and not isinstance(page, BaseException):
94
+ page_contents.append(page)
95
95
  urls.append(link.url)
96
96
  snippets.append(link.snippet)
97
97
  search_calls += 1
@@ -1,13 +1,19 @@
1
1
  import asyncio
2
+ import contextlib
3
+ import time
2
4
  from dataclasses import dataclass
5
+ from typing import AsyncIterator
3
6
 
7
+ from inspect_ai._util.working import report_sample_waiting_time
4
8
 
5
- def concurrency(
9
+
10
+ @contextlib.asynccontextmanager
11
+ async def concurrency(
6
12
  name: str,
7
13
  concurrency: int,
8
14
  key: str | None = None,
9
- ) -> asyncio.Semaphore:
10
- """Obtain a concurrency context.
15
+ ) -> AsyncIterator[None]:
16
+ """Concurrency context manager.
11
17
 
12
18
  A concurrency context can be used to limit the number of coroutines
13
19
  executing a block of code (e.g calling an API). For example, here
@@ -32,9 +38,6 @@ def concurrency(
32
38
  Used if the unique key isn't human readable -- e.g. includes
33
39
  api tokens or account ids so that the more readable `name`
34
40
  can be presented to users e.g in console UI>
35
-
36
- Returns:
37
- Asyncio Semaphore for concurrency context.
38
41
  """
39
42
  # sort out key
40
43
  key = key if key else name
@@ -47,8 +50,11 @@ def concurrency(
47
50
  )
48
51
  _concurrency_semaphores[key] = semaphore
49
52
 
50
- # return the semaphore
51
- return semaphore.semaphore
53
+ # wait and yield to protected code
54
+ start_wait = time.monotonic()
55
+ async with semaphore.semaphore:
56
+ report_sample_waiting_time(time.monotonic() - start_wait)
57
+ yield
52
58
 
53
59
 
54
60
  def concurrency_status() -> dict[str, tuple[int, int]]:
@@ -5,6 +5,7 @@ from typing import Any, NoReturn, cast
5
5
  from shortuuid import uuid
6
6
 
7
7
  from inspect_ai._util.constants import SANDBOX_SETUP_TIMEOUT
8
+ from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
8
9
 
9
10
  from .environment import (
10
11
  SampleCleanup,
@@ -132,6 +133,9 @@ async def init_sandbox_environments_sample(
132
133
  # verify that there is at least one environment and a 'default' env
133
134
  validate_sandbox_environments(sandboxenv_type, environments)
134
135
 
136
+ # proxy environments (for recording SandboxEvent)
137
+ environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
138
+
135
139
  try:
136
140
  # copy files into environments
137
141
  await copy_sandbox_environment_files(files, environments)
@@ -148,6 +152,7 @@ async def init_sandbox_environments_sample(
148
152
  return environments
149
153
 
150
154
  except Exception as ex:
155
+ environments = unproxy_environments(environments)
151
156
  await sample_cleanup(task_name, config, environments, True)
152
157
  raise ex
153
158
 
@@ -161,9 +166,19 @@ async def cleanup_sandbox_environments_sample(
161
166
  ) -> None:
162
167
  sandboxenv_type = registry_find_sandboxenv(type)
163
168
  sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
169
+ environments = unproxy_environments(environments)
164
170
  await sample_cleanup(task_name, config, environments, interrupted)
165
171
 
166
172
 
173
+ def unproxy_environments(
174
+ environments: dict[str, SandboxEnvironment],
175
+ ) -> dict[str, SandboxEnvironment]:
176
+ return {
177
+ k: v._sandbox
178
+ for k, v in cast(dict[str, SandboxEnvironmentProxy], environments).items()
179
+ }
180
+
181
+
167
182
  async def copy_sandbox_environment_files(
168
183
  files: dict[str, bytes], environments: dict[str, SandboxEnvironment]
169
184
  ) -> None:
@@ -56,17 +56,22 @@ async def project_cleanup_shutdown(cleanup: bool) -> None:
56
56
  title_style="bold",
57
57
  title_justify="left",
58
58
  )
59
+ table.add_column("Sample ID")
60
+ table.add_column("Epoch")
59
61
  table.add_column("Container(s)", no_wrap=True)
60
- table.add_column("Cleanup")
61
62
  for project in shutdown_projects:
62
63
  containers = await compose_ps(project, all=True)
63
64
  table.add_row(
65
+ str(project.sample_id) if project.sample_id is not None else "",
66
+ str(project.epoch if project.epoch is not None else ""),
64
67
  "\n".join(container["Name"] for container in containers),
65
- f"[blue]inspect sandbox cleanup docker {project.name}[/blue]",
66
68
  )
67
69
  print(table)
68
70
  print(
69
- "\nCleanup all environments with: [blue]inspect sandbox cleanup docker[/blue]\n"
71
+ "\n"
72
+ "Cleanup all containers : [blue]inspect sandbox cleanup docker[/blue]\n"
73
+ "Cleanup single container: [blue]inspect sandbox cleanup docker <container-id>[/blue]",
74
+ "\n",
70
75
  )
71
76
 
72
77
  # remove auto-compose files
@@ -28,7 +28,7 @@ COMPOSE_WAIT = 120
28
28
 
29
29
  async def compose_up(
30
30
  project: ComposeProject, services: dict[str, ComposeService]
31
- ) -> None:
31
+ ) -> ExecResult[str]:
32
32
  # compute the maximum amount of time we will
33
33
  up_command = ["up", "--detach", "--wait"]
34
34
 
@@ -49,7 +49,8 @@ async def compose_up(
49
49
  # passing the --wait flag (see https://github.com/docker/compose/issues/10596).
50
50
  # In practice, we will catch any errors when calling compose_check_running()
51
51
  # immediately after we call compose_up().
52
- await compose_command(up_command, project=project, timeout=timeout)
52
+ result = await compose_command(up_command, project=project, timeout=timeout)
53
+ return result
53
54
 
54
55
 
55
56
  async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
@@ -121,14 +122,9 @@ async def compose_check_running(
121
122
  unhealthy_services = services
122
123
  for successful_service in successful_services:
123
124
  unhealthy_services.remove(successful_service["Service"])
124
-
125
- msg = (
126
- "One or more docker containers failed to start from "
127
- f"{project.config}: {','.join(unhealthy_services)}"
128
- )
129
- raise RuntimeError(msg)
125
+ return []
130
126
  else:
131
- raise RuntimeError("No services started")
127
+ return []
132
128
 
133
129
  return [service["Service"] for service in running_services]
134
130
 
@@ -5,7 +5,7 @@ import os
5
5
  import tempfile
6
6
  from logging import getLogger
7
7
  from pathlib import Path, PurePosixPath
8
- from typing import Literal, Union, cast, overload
8
+ from typing import Literal, Union, overload
9
9
 
10
10
  from typing_extensions import override
11
11
 
@@ -139,8 +139,15 @@ class DockerSandboxEnvironment(SandboxEnvironment):
139
139
  env[key] = str(value)
140
140
 
141
141
  # create project
142
+ from inspect_ai.log._samples import sample_active
143
+
144
+ sample = sample_active()
142
145
  project = await ComposeProject.create(
143
- name=task_project_name(task_name), config=config, env=env
146
+ name=task_project_name(task_name),
147
+ config=config,
148
+ sample_id=sample.sample.id if sample is not None else None,
149
+ epoch=sample.epoch if sample is not None else None,
150
+ env=env,
144
151
  )
145
152
 
146
153
  try:
@@ -148,13 +155,18 @@ class DockerSandboxEnvironment(SandboxEnvironment):
148
155
  services = await compose_services(project)
149
156
 
150
157
  # start the services
151
- await compose_up(project, services)
158
+ result = await compose_up(project, services)
152
159
 
153
160
  # check to ensure that the services are running
154
161
  running_services = await compose_check_running(
155
162
  list(services.keys()), project=project
156
163
  )
157
164
 
165
+ if not running_services:
166
+ raise RuntimeError(
167
+ f"No services started.\nCompose up stderr: {result.stderr}"
168
+ )
169
+
158
170
  # note that the project is running
159
171
  project_startup(project)
160
172
 
@@ -209,9 +221,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
209
221
  # (this enables us to show output for the cleanup operation)
210
222
  if not interrupted:
211
223
  # extract project from first environment
212
- project = cast(
213
- DockerSandboxEnvironment, next(iter(environments.values()))
214
- )._project
224
+ project = (
225
+ next(iter(environments.values()))
226
+ .as_type(DockerSandboxEnvironment)
227
+ ._project
228
+ )
215
229
  # cleanup the project
216
230
  await project_cleanup(project=project, quiet=True)
217
231
 
@@ -21,6 +21,8 @@ logger = getLogger(__name__)
21
21
  class ComposeProject:
22
22
  name: str
23
23
  config: str | None
24
+ sample_id: int | str | None
25
+ epoch: int | None
24
26
  env: dict[str, str] | None
25
27
 
26
28
  @classmethod
@@ -28,6 +30,9 @@ class ComposeProject:
28
30
  cls,
29
31
  name: str,
30
32
  config: SandboxEnvironmentConfigType | None,
33
+ *,
34
+ sample_id: int | str | None = None,
35
+ epoch: int | None = None,
31
36
  env: dict[str, str] = {},
32
37
  ) -> "ComposeProject":
33
38
  # resolve config to full path if we have one
@@ -58,16 +63,20 @@ class ComposeProject:
58
63
  ensure_auto_compose_file(config)
59
64
 
60
65
  # return project
61
- return ComposeProject(name, config, env)
66
+ return ComposeProject(name, config, sample_id=sample_id, epoch=epoch, env=env)
62
67
 
63
68
  def __init__(
64
69
  self,
65
70
  name: str,
66
71
  config: str | None,
72
+ sample_id: int | str | None,
73
+ epoch: int | None,
67
74
  env: dict[str, str],
68
75
  ) -> None:
69
76
  self.name = name
70
77
  self.config = config
78
+ self.sample_id = sample_id
79
+ self.epoch = epoch
71
80
  self.env = env
72
81
 
73
82
  def __eq__(self, other: object) -> bool:
@@ -2,12 +2,24 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  from dataclasses import dataclass, field
5
- from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
5
+ from typing import (
6
+ Any,
7
+ Awaitable,
8
+ Callable,
9
+ Literal,
10
+ NamedTuple,
11
+ Type,
12
+ TypeVar,
13
+ Union,
14
+ overload,
15
+ )
6
16
 
7
17
  from pydantic import BaseModel, Field
8
18
 
9
19
  from .._subprocess import ExecResult
10
20
 
21
+ ST = TypeVar("ST", bound="SandboxEnvironment")
22
+
11
23
  TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
12
24
  TaskCleanup = Callable[
13
25
  [str, Union["SandboxEnvironmentConfigType", None], bool], Awaitable[None]
@@ -180,6 +192,25 @@ class SandboxEnvironment(abc.ABC):
180
192
  """
181
193
  raise NotImplementedError("connection not implemented")
182
194
 
195
+ def as_type(self, sandbox_cls: Type[ST]) -> ST:
196
+ """Verify and return a reference to a subclass of SandboxEnvironment.
197
+
198
+ Args:
199
+ sandbox_cls: Class of sandbox (subclass of SandboxEnvironment)
200
+
201
+ Returns:
202
+ Reference to the sandbox using the requested type.
203
+
204
+ Raises:
205
+ TypeError: If the sandbox is not of the requested type.
206
+ """
207
+ if isinstance(self, sandbox_cls):
208
+ return self
209
+ else:
210
+ raise TypeError(
211
+ f"Expected instance of {sandbox_cls.__name__}, got {type(self).__name__}"
212
+ )
213
+
183
214
  @classmethod
184
215
  def config_files(cls) -> list[str]:
185
216
  """Standard config files for this provider (used for automatic discovery)"""
@@ -0,0 +1,149 @@
1
+ import shlex
2
+ from typing import Literal, Type, Union, overload
3
+
4
+ from pydantic import JsonValue
5
+ from pydantic_core import to_jsonable_python
6
+ from typing_extensions import override
7
+
8
+ from inspect_ai._util.text import truncate_lines
9
+ from inspect_ai.util._subprocess import ExecResult
10
+
11
+ from .environment import (
12
+ ST,
13
+ SandboxConnection,
14
+ SandboxEnvironment,
15
+ SandboxEnvironmentConfigType,
16
+ )
17
+
18
+
19
+ class SandboxEnvironmentProxy(SandboxEnvironment):
20
+ def __init__(self, sandbox: SandboxEnvironment) -> None:
21
+ self._sandbox = sandbox
22
+
23
+ @override
24
+ async def exec(
25
+ self,
26
+ cmd: list[str],
27
+ input: str | bytes | None = None,
28
+ cwd: str | None = None,
29
+ env: dict[str, str] = {},
30
+ user: str | None = None,
31
+ timeout: int | None = None,
32
+ timeout_retry: bool = True,
33
+ ) -> ExecResult[str]:
34
+ from inspect_ai.log._transcript import SandboxEvent, transcript
35
+
36
+ # make call
37
+ result = await self._sandbox.exec(
38
+ cmd, input, cwd, env, user, timeout, timeout_retry
39
+ )
40
+
41
+ # yield event
42
+ options: dict[str, JsonValue] = {}
43
+ if cwd:
44
+ options["cwd"] = cwd
45
+ if env:
46
+ options["env"] = to_jsonable_python(env)
47
+ if user:
48
+ options["user"] = user
49
+ if timeout is not None:
50
+ options["timeout"] = timeout
51
+ if timeout_retry is not True:
52
+ options["timeout_retry"] = timeout_retry
53
+ transcript()._event(
54
+ SandboxEvent(
55
+ action="exec",
56
+ cmd=" ".join([shlex.quote(c) for c in cmd]),
57
+ input=content_display(input) if input is not None else None,
58
+ options=options,
59
+ result=result.returncode,
60
+ output=content_display(
61
+ f"{result.stderr}\n\n{result.stdout}"
62
+ if result.stderr
63
+ else result.stdout
64
+ ),
65
+ )
66
+ )
67
+
68
+ # return result
69
+ return result
70
+
71
+ @override
72
+ async def write_file(self, file: str, contents: str | bytes) -> None:
73
+ from inspect_ai.log._transcript import SandboxEvent, transcript
74
+
75
+ # make call
76
+ await self._sandbox.write_file(file, contents)
77
+
78
+ # yield event
79
+ transcript()._event(
80
+ SandboxEvent(
81
+ action="write_file", file=file, input=content_display(contents)
82
+ )
83
+ )
84
+
85
+ @overload
86
+ async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
87
+
88
+ @overload
89
+ async def read_file(self, file: str, text: Literal[False]) -> bytes: ...
90
+
91
+ @override
92
+ async def read_file(self, file: str, text: bool = True) -> Union[str | bytes]:
93
+ from inspect_ai.log._transcript import SandboxEvent, transcript
94
+
95
+ # make call
96
+ if text is True:
97
+ output: str | bytes = await self._sandbox.read_file(file, True)
98
+ else:
99
+ output = await self._sandbox.read_file(file, False)
100
+
101
+ # yield event
102
+ transcript()._event(
103
+ SandboxEvent(action="read_file", file=file, output=content_display(output))
104
+ )
105
+
106
+ # return result
107
+ return output
108
+
109
+ @override
110
+ async def connection(self) -> SandboxConnection:
111
+ return await self._sandbox.connection()
112
+
113
+ @override
114
+ def as_type(self, sandbox_cls: Type[ST]) -> ST:
115
+ if isinstance(self._sandbox, sandbox_cls):
116
+ return self._sandbox
117
+ else:
118
+ raise TypeError(
119
+ f"Expected instance of {sandbox_cls.__name__}, got {type(self._sandbox).__name__}"
120
+ )
121
+
122
+ @classmethod
123
+ async def sample_cleanup(
124
+ cls,
125
+ task_name: str,
126
+ config: SandboxEnvironmentConfigType | None,
127
+ environments: dict[str, SandboxEnvironment],
128
+ interrupted: bool,
129
+ ) -> None:
130
+ pass
131
+
132
+
133
+ def content_display(content: str | bytes) -> str:
134
+ if isinstance(content, str):
135
+ content, truncated = truncate_lines(content, 20)
136
+ if truncated:
137
+ content = f"{content}\n\nOutput truncated ({truncated} additional lines)"
138
+ return content
139
+ else:
140
+ return f"binary ({pretty_size(len(content))})"
141
+
142
+
143
+ def pretty_size(size: int) -> str:
144
+ if size < 1024:
145
+ return f"{size} B"
146
+ if size < 1024 * 1024:
147
+ return f"{size / 1024:.2f} KB"
148
+
149
+ return f"{size / (1024 * 1024):.2f} MB"
@@ -1,7 +1,7 @@
1
1
  import tempfile
2
2
  import warnings
3
3
  from pathlib import Path
4
- from typing import Literal, Union, cast, overload
4
+ from typing import Literal, Union, overload
5
5
 
6
6
  from typing_extensions import override
7
7
 
@@ -40,8 +40,8 @@ class LocalSandboxEnvironment(SandboxEnvironment):
40
40
  interrupted: bool,
41
41
  ) -> None:
42
42
  for environment in environments.values():
43
- env = cast(LocalSandboxEnvironment, environment)
44
- env.directory.cleanup()
43
+ sandbox = environment.as_type(LocalSandboxEnvironment)
44
+ sandbox.directory.cleanup()
45
45
 
46
46
  def __init__(self) -> None:
47
47
  self.directory = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
@@ -445,7 +445,8 @@ async def test_exec_stdout_is_limited(sandbox_env: SandboxEnvironment) -> None:
445
445
  assert "limit of 10 MiB was exceeded" in str(e_info.value)
446
446
  truncated_output = e_info.value.truncated_output
447
447
  # `yes` outputs 'y\n' (ASCII) so the size equals the string length.
448
- assert truncated_output and len(truncated_output) == 10 * 1024**2
448
+ # some shells additionally output 'canceled\n' so we add fudge factor for that
449
+ assert truncated_output and (len(truncated_output) - 10 * 1024**2) < 10
449
450
 
450
451
 
451
452
  async def test_exec_stderr_is_limited(sandbox_env: SandboxEnvironment) -> None:
@@ -199,7 +199,10 @@ async def subprocess(
199
199
  else:
200
200
  result = await asyncio.wait_for(anext(rc), timeout=timeout)
201
201
  return cast(Union[ExecResult[str], ExecResult[bytes]], result)
202
- except asyncio.exceptions.TimeoutError:
202
+ # wait_for raises asyncio.TimeoutError under Python 3.10, but TimeoutError
203
+ # under Python > 3.11! asynio.timeout (introduced in Python 3.11) always
204
+ # raises the standard TimeoutError
205
+ except (TimeoutError, asyncio.exceptions.TimeoutError):
203
206
  # terminate timed out process -- try for graceful termination
204
207
  # then be more forceful if requied
205
208
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: inspect_ai
3
- Version: 0.3.68
3
+ Version: 0.3.70
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -26,7 +26,7 @@ Requires-Dist: beautifulsoup4
26
26
  Requires-Dist: click>=8.1.3
27
27
  Requires-Dist: debugpy
28
28
  Requires-Dist: docstring-parser>=0.16
29
- Requires-Dist: fsspec>=2021.09.0
29
+ Requires-Dist: fsspec<=2024.12.0,>=2023.1.0
30
30
  Requires-Dist: httpx
31
31
  Requires-Dist: ijson>=3.2.0
32
32
  Requires-Dist: jsonlines>=3.0.0
@@ -45,7 +45,7 @@ Requires-Dist: s3fs>=2023
45
45
  Requires-Dist: semver>=3.0.0
46
46
  Requires-Dist: shortuuid
47
47
  Requires-Dist: tenacity
48
- Requires-Dist: textual<=1.0.0,>=0.86.2
48
+ Requires-Dist: textual>=0.86.2
49
49
  Requires-Dist: typing_extensions>=4.9.0
50
50
  Requires-Dist: zipp>=3.19.1
51
51
  Provides-Extra: dev
@@ -53,7 +53,7 @@ Requires-Dist: anthropic; extra == "dev"
53
53
  Requires-Dist: aioboto3; extra == "dev"
54
54
  Requires-Dist: azure-ai-inference; extra == "dev"
55
55
  Requires-Dist: google-cloud-aiplatform; extra == "dev"
56
- Requires-Dist: google-generativeai; extra == "dev"
56
+ Requires-Dist: google-genai; extra == "dev"
57
57
  Requires-Dist: goodfire; extra == "dev"
58
58
  Requires-Dist: griffe; extra == "dev"
59
59
  Requires-Dist: groq; extra == "dev"
@@ -71,7 +71,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
71
71
  Requires-Dist: pytest-cov; extra == "dev"
72
72
  Requires-Dist: pytest-dotenv; extra == "dev"
73
73
  Requires-Dist: pytest-xdist; extra == "dev"
74
- Requires-Dist: ruff==0.9.5; extra == "dev"
74
+ Requires-Dist: ruff==0.9.6; extra == "dev"
75
75
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
76
76
  Requires-Dist: types-Markdown; extra == "dev"
77
77
  Requires-Dist: types-PyYAML; extra == "dev"