inspect-ai 0.3.53__py3-none-any.whl → 0.3.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. inspect_ai/_cli/eval.py +26 -1
  2. inspect_ai/_cli/main.py +2 -0
  3. inspect_ai/_cli/trace.py +244 -0
  4. inspect_ai/_display/textual/app.py +5 -1
  5. inspect_ai/_display/textual/widgets/tasks.py +13 -3
  6. inspect_ai/_eval/eval.py +17 -0
  7. inspect_ai/_eval/task/images.py +4 -14
  8. inspect_ai/_eval/task/log.py +2 -1
  9. inspect_ai/_eval/task/run.py +26 -10
  10. inspect_ai/_util/constants.py +3 -3
  11. inspect_ai/_util/display.py +1 -0
  12. inspect_ai/_util/logger.py +34 -8
  13. inspect_ai/_util/trace.py +275 -0
  14. inspect_ai/log/_log.py +3 -0
  15. inspect_ai/log/_message.py +2 -2
  16. inspect_ai/log/_recorders/eval.py +6 -17
  17. inspect_ai/log/_recorders/json.py +19 -17
  18. inspect_ai/model/_cache.py +22 -16
  19. inspect_ai/model/_call_tools.py +9 -1
  20. inspect_ai/model/_generate_config.py +2 -2
  21. inspect_ai/model/_model.py +11 -12
  22. inspect_ai/model/_providers/bedrock.py +1 -1
  23. inspect_ai/model/_providers/openai.py +11 -1
  24. inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
  25. inspect_ai/util/_sandbox/context.py +6 -1
  26. inspect_ai/util/_sandbox/docker/compose.py +58 -19
  27. inspect_ai/util/_sandbox/docker/docker.py +11 -11
  28. inspect_ai/util/_sandbox/docker/util.py +0 -6
  29. inspect_ai/util/_sandbox/service.py +17 -7
  30. inspect_ai/util/_subprocess.py +6 -1
  31. inspect_ai/util/_subtask.py +8 -2
  32. {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/METADATA +7 -7
  33. {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/RECORD +37 -35
  34. {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/LICENSE +0 -0
  35. {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/WHEEL +0 -0
  36. {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/entry_points.txt +0 -0
  37. {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/top_level.txt +0 -0
@@ -58,7 +58,7 @@ class GenerateConfigArgs(TypedDict, total=False):
58
58
  """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, and TogetherAI only."""
59
59
 
60
60
  logprobs: bool | None
61
- """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, and Huggingface only."""
61
+ """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
62
62
 
63
63
  top_logprobs: int | None
64
64
  """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, and Huggingface only."""
@@ -128,7 +128,7 @@ class GenerateConfig(BaseModel):
128
128
  """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, TogetherAI, and vLLM only."""
129
129
 
130
130
  logprobs: bool | None = Field(default=None)
131
- """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, and vLLM only."""
131
+ """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
132
132
 
133
133
  top_logprobs: int | None = Field(default=None)
134
134
  """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, Huggingface, and vLLM only."""
@@ -9,7 +9,6 @@ from contextvars import ContextVar
9
9
  from copy import deepcopy
10
10
  from typing import Any, Callable, Literal, Type, cast
11
11
 
12
- from shortuuid import uuid
13
12
  from tenacity import (
14
13
  retry,
15
14
  retry_if_exception,
@@ -30,6 +29,7 @@ from inspect_ai._util.registry import (
30
29
  registry_unqualified_name,
31
30
  )
32
31
  from inspect_ai._util.retry import log_rate_limit_retry
32
+ from inspect_ai._util.trace import trace_action
33
33
  from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
34
34
  from inspect_ai.tool._tool_def import ToolDef, tool_defs
35
35
  from inspect_ai.util import concurrency
@@ -363,17 +363,16 @@ class Model:
363
363
  cache="write" if cache else None,
364
364
  )
365
365
 
366
- generate_id = uuid()
367
- logger.debug(f"model generate {generate_id} ({str(self)})")
368
- time_start = time.perf_counter()
369
- result = await self.api.generate(
370
- input=input,
371
- tools=tools,
372
- tool_choice=tool_choice,
373
- config=config,
374
- )
375
- time_elapsed = time.perf_counter() - time_start
376
- logger.debug(f"model generate {generate_id} (completed)")
366
+ with trace_action(logger, "Model", f"generate ({str(self)})"):
367
+ time_start = time.perf_counter()
368
+ result = await self.api.generate(
369
+ input=input,
370
+ tools=tools,
371
+ tool_choice=tool_choice,
372
+ config=config,
373
+ )
374
+ time_elapsed = time.perf_counter() - time_start
375
+
377
376
  if isinstance(result, tuple):
378
377
  output, call = result
379
378
  else:
@@ -312,7 +312,7 @@ class BedrockAPI(ModelAPI):
312
312
  from botocore.exceptions import ClientError
313
313
 
314
314
  # The bedrock client
315
- async with self.session.client(
315
+ async with self.session.client( # type: ignore[call-overload]
316
316
  service_name="bedrock-runtime",
317
317
  endpoint_url=self.base_url,
318
318
  config=Config(
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import os
3
+ from logging import getLogger
3
4
  from typing import Any
4
5
 
5
6
  from openai import (
@@ -36,6 +37,7 @@ from inspect_ai._util.constants import DEFAULT_MAX_RETRIES
36
37
  from inspect_ai._util.content import Content
37
38
  from inspect_ai._util.error import PrerequisiteError
38
39
  from inspect_ai._util.images import image_as_data_uri
40
+ from inspect_ai._util.logger import warn_once
39
41
  from inspect_ai._util.url import is_data_uri, is_http_url
40
42
  from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
41
43
 
@@ -58,6 +60,8 @@ from .util import (
58
60
  parse_tool_call,
59
61
  )
60
62
 
63
+ logger = getLogger(__name__)
64
+
61
65
  OPENAI_API_KEY = "OPENAI_API_KEY"
62
66
  AZURE_OPENAI_API_KEY = "AZURE_OPENAI_API_KEY"
63
67
  AZUREAI_OPENAI_API_KEY = "AZUREAI_OPENAI_API_KEY"
@@ -270,7 +274,13 @@ class OpenAIAPI(ModelAPI):
270
274
  if config.seed is not None:
271
275
  params["seed"] = config.seed
272
276
  if config.temperature is not None:
273
- params["temperature"] = config.temperature
277
+ if self.is_o1():
278
+ warn_once(
279
+ logger,
280
+ "o1 models do not support the 'temperature' parameter (temperature is always 1).",
281
+ )
282
+ else:
283
+ params["temperature"] = config.temperature
274
284
  # TogetherAPI requires temperature w/ num_choices
275
285
  elif config.num_choices is not None:
276
286
  params["temperature"] = 1
@@ -362,7 +362,7 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
362
362
  else:
363
363
  arg_list = ["python3", WEB_CLIENT_REQUEST, cmd] + list(args)
364
364
 
365
- result = await sandbox_env.exec(arg_list)
365
+ result = await sandbox_env.exec(arg_list, timeout=180)
366
366
  if not result.success:
367
367
  raise RuntimeError(
368
368
  f"Error executing web browser command {cmd}({', '.join(args)}): {result.stderr}"
@@ -191,7 +191,12 @@ async def setup_sandbox_environment(
191
191
 
192
192
  # chmod, execute, and remove
193
193
  async def exec(cmd: list[str]) -> None:
194
- result = await env.exec(cmd)
194
+ try:
195
+ result = await env.exec(cmd, timeout=30)
196
+ except TimeoutError:
197
+ raise RuntimeError(
198
+ f"Timed out executing command {' '.join(cmd)} in sandbox"
199
+ )
195
200
 
196
201
  if not result.success:
197
202
  raise RuntimeError(
@@ -16,7 +16,7 @@ from .prereqs import (
16
16
  DOCKER_COMPOSE_REQUIRED_VERSION_PULL_POLICY,
17
17
  validate_docker_compose,
18
18
  )
19
- from .util import ComposeProject, is_inspect_project, sandbox_log
19
+ from .util import ComposeProject, is_inspect_project
20
20
 
21
21
  logger = getLogger(__name__)
22
22
 
@@ -31,7 +31,9 @@ async def compose_up(project: ComposeProject) -> None:
31
31
  project=project,
32
32
  )
33
33
  if not result.success:
34
- msg = f"Failed to start docker services {result.stderr}"
34
+ msg = (
35
+ f"Failed to start docker services for {project.config}: " f"{result.stderr}"
36
+ )
35
37
  raise RuntimeError(msg)
36
38
 
37
39
 
@@ -94,7 +96,10 @@ async def compose_check_running(services: list[str], project: ComposeProject) ->
94
96
  for running_service in running_services:
95
97
  unhealthy_services.remove(running_service["Service"])
96
98
 
97
- msg = f"One or more docker containers failed to start {','.join(unhealthy_services)}"
99
+ msg = (
100
+ "One or more docker containers failed to start from "
101
+ f"{project.config}: {','.join(unhealthy_services)}"
102
+ )
98
103
  raise RuntimeError(msg)
99
104
  else:
100
105
  raise RuntimeError("No services started")
@@ -152,8 +157,9 @@ async def compose_pull(
152
157
 
153
158
  async def compose_exec(
154
159
  command: list[str],
160
+ *,
155
161
  project: ComposeProject,
156
- timeout: int | None = None,
162
+ timeout: int | None,
157
163
  input: str | bytes | None = None,
158
164
  output_limit: int | None = None,
159
165
  ) -> ExecResult[str]:
@@ -206,7 +212,6 @@ async def compose_cleanup_images(
206
212
  cwd: str | None = None,
207
213
  timeout: int | None = None,
208
214
  ) -> None:
209
- sandbox_log("Removing images")
210
215
  # List the images that would be created for this compose
211
216
  images_result = await compose_command(
212
217
  ["config", "--images"], project=project, cwd=cwd
@@ -241,10 +246,14 @@ async def compose_cleanup_images(
241
246
  logger.warning(msg)
242
247
 
243
248
 
249
+ DEFAULT_COMPOSE_TIMEOUT = 60
250
+
251
+
244
252
  async def compose_command(
245
253
  command: list[str],
254
+ *,
246
255
  project: ComposeProject,
247
- timeout: int | None = None,
256
+ timeout: int | None = DEFAULT_COMPOSE_TIMEOUT,
248
257
  input: str | bytes | None = None,
249
258
  cwd: str | Path | None = None,
250
259
  forward_env: bool = True,
@@ -278,16 +287,46 @@ async def compose_command(
278
287
  # build final command
279
288
  compose_command = compose_command + command
280
289
 
281
- # Execute the command
282
- sandbox_log(f"compose command: {shlex.join(compose_command)}")
283
- result = await subprocess(
284
- compose_command,
285
- input=input,
286
- cwd=cwd,
287
- env=env,
288
- timeout=timeout,
289
- capture_output=capture_output,
290
- output_limit=output_limit,
291
- )
292
- sandbox_log(f"compose command completed: {shlex.join(compose_command)}")
293
- return result
290
+ # function to run command
291
+ async def run_command(command_timeout: int | None) -> ExecResult[str]:
292
+ result = await subprocess(
293
+ compose_command,
294
+ input=input,
295
+ cwd=cwd,
296
+ env=env,
297
+ timeout=command_timeout,
298
+ capture_output=capture_output,
299
+ output_limit=output_limit,
300
+ )
301
+ return result
302
+
303
+ # we have observed underlying unreliability in docker compose in some linux
304
+ # environments on EC2 -- this exhibits in very simple commands (e.g. compose config)
305
+ # simply never returning. this tends to happen when we know there is a large
306
+ # number of commands in flight (task/sample init) so could be some sort of
307
+ # timing issue / race condition in the docker daemon. we've also observed that
308
+ # these same commands succeed if you just retry them. therefore, we add some
309
+ # extra resiliance by retrying commands with a timeout once. we were observing
310
+ # commands hanging at a rate of ~ 1/1000, so we retry up to twice (tweaking the
311
+ # retry time down) to make the odds of hanging vanishingly small
312
+
313
+ if timeout is not None:
314
+ MAX_RETRIES = 2
315
+ retries = 0
316
+ while True:
317
+ try:
318
+ command_timeout = (
319
+ timeout if retries == 0 else (min(timeout, 60) // retries)
320
+ )
321
+ return await run_command(command_timeout)
322
+ except TimeoutError:
323
+ retries += 1
324
+ if retries <= MAX_RETRIES:
325
+ logger.info(
326
+ f"Retrying docker compose command: {shlex.join(compose_command)}"
327
+ )
328
+ else:
329
+ raise
330
+
331
+ else:
332
+ return await run_command(timeout)
@@ -42,7 +42,7 @@ from .compose import (
42
42
  from .config import CONFIG_FILES, DOCKERFILE
43
43
  from .internal import build_internal_image, is_internal_image
44
44
  from .prereqs import validate_prereqs
45
- from .util import ComposeProject, sandbox_log, task_project_name
45
+ from .util import ComposeProject, task_project_name
46
46
 
47
47
  logger = getLogger(__name__)
48
48
 
@@ -113,8 +113,6 @@ class DockerSandboxEnvironment(SandboxEnvironment):
113
113
  config: SandboxEnvironmentConfigType | None,
114
114
  metadata: dict[str, str],
115
115
  ) -> dict[str, SandboxEnvironment]:
116
- sandbox_log("setup")
117
-
118
116
  # create environment variables for sample metadata
119
117
  env: dict[str, str] = {}
120
118
  if isinstance(config, str) and Path(config).exists():
@@ -264,7 +262,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
264
262
 
265
263
  @override
266
264
  async def write_file(self, file: str, contents: str | bytes) -> None:
267
- sandbox_log(f"write_file: {file}")
265
+ # exec function w/ timeout
266
+ async def exec(cmd: list[str]) -> ExecResult[str]:
267
+ return await self.exec(cmd, timeout=60)
268
268
 
269
269
  # resolve relative file paths
270
270
  file = self.container_file(file)
@@ -311,8 +311,8 @@ class DockerSandboxEnvironment(SandboxEnvironment):
311
311
  local_tmpfile.close() # this will also delete the file
312
312
 
313
313
  if not hasattr(self, "_docker_user"):
314
- uid = (await self.exec(["id", "-u"])).stdout.strip()
315
- gid = (await self.exec(["id", "-g"])).stdout.strip()
314
+ uid = (await exec(["id", "-u"])).stdout.strip()
315
+ gid = (await exec(["id", "-g"])).stdout.strip()
316
316
  self._docker_user = (uid, gid)
317
317
 
318
318
  await compose_command(
@@ -331,7 +331,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
331
331
  parent = PurePosixPath(file).parent
332
332
 
333
333
  # We do these steps in a shell script for efficiency to avoid round-trips to docker.
334
- res_cp = await self.exec(
334
+ res_cp = await exec(
335
335
  [
336
336
  "sh",
337
337
  "-e",
@@ -346,7 +346,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
346
346
 
347
347
  if res_cp.returncode != 0:
348
348
  if "Permission denied" in res_cp.stderr:
349
- ls_result = await self.exec(["ls", "-la", "."])
349
+ ls_result = await exec(["ls", "-la", "."])
350
350
  error_string = f"Permission was denied. Error details: {res_cp.stderr}; ls -la: {ls_result.stdout}; {self._docker_user=}"
351
351
  raise PermissionError(error_string)
352
352
  elif (
@@ -367,8 +367,6 @@ class DockerSandboxEnvironment(SandboxEnvironment):
367
367
 
368
368
  @override
369
369
  async def read_file(self, file: str, text: bool = True) -> Union[str, bytes]:
370
- sandbox_log(f"read_file: {file}")
371
-
372
370
  # Write the contents to a temp file
373
371
  with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
374
372
  # resolve relative file paths
@@ -449,7 +447,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
449
447
  async def container_working_dir(
450
448
  service: str, project: ComposeProject, default: str = "/"
451
449
  ) -> str:
452
- result = await compose_exec([service, "sh", "-c", "pwd"], project)
450
+ result = await compose_exec(
451
+ [service, "sh", "-c", "pwd"], timeout=60, project=project
452
+ )
453
453
  if result.success:
454
454
  return result.stdout.strip()
455
455
  else:
@@ -5,8 +5,6 @@ from pathlib import Path
5
5
 
6
6
  from shortuuid import uuid
7
7
 
8
- from inspect_ai._util.constants import SANDBOX
9
-
10
8
  from ..environment import SandboxEnvironmentConfigType
11
9
  from .config import (
12
10
  COMPOSE_DOCKERFILE_YAML,
@@ -94,7 +92,3 @@ inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{22}$"
94
92
 
95
93
  def is_inspect_project(name: str) -> bool:
96
94
  return re.match(inspect_project_pattern, name) is not None
97
-
98
-
99
- def sandbox_log(msg: str) -> None:
100
- logger.log(SANDBOX, f"DOCKER: {msg}")
@@ -10,6 +10,8 @@ from typing import (
10
10
 
11
11
  from pydantic import JsonValue
12
12
 
13
+ from inspect_ai.util._subprocess import ExecResult
14
+
13
15
  from .environment import SandboxEnvironment
14
16
 
15
17
  REQUESTS_DIR = "requests"
@@ -129,9 +131,9 @@ class SandboxService:
129
131
  """Handle all pending service requests."""
130
132
  # list pending requests
131
133
  list_requests = f"ls -1 {self._requests_dir}/*.json"
132
- result = await self._sandbox.exec(["bash", "-c", list_requests])
134
+ result = await self._exec(["bash", "-c", list_requests])
133
135
 
134
- # process reqests
136
+ # process requests
135
137
  if result.success:
136
138
  request_files = result.stdout.strip().splitlines()
137
139
  if request_files:
@@ -142,7 +144,7 @@ class SandboxService:
142
144
  async def _handle_request(self, request_file: str) -> None:
143
145
  # read request
144
146
  read_request = f"cat {request_file}"
145
- result = await self._sandbox.exec(["bash", "-c", read_request])
147
+ result = await self._exec(["bash", "-c", read_request])
146
148
  if not result.success:
147
149
  raise RuntimeError(
148
150
  f"Error reading request for service {self._name}: '{read_request}' ({result.stderr})"
@@ -181,7 +183,7 @@ class SandboxService:
181
183
  await self._write_text_file(response_path, json.dumps(response_data))
182
184
 
183
185
  # remove request file
184
- exec_rm = await self._sandbox.exec(["rm", "-f", request_file])
186
+ exec_rm = await self._exec(["rm", "-f", request_file])
185
187
  if not exec_rm.success:
186
188
  raise RuntimeError(
187
189
  f"Error removing request file '{request_file}': {exec_rm.stderr}"
@@ -215,8 +217,8 @@ class SandboxService:
215
217
 
216
218
  async def _create_rpc_dir(self, name: str) -> str:
217
219
  rpc_dir = PurePosixPath(self._service_dir, name).as_posix()
218
- result = await self._sandbox.exec(["rm", "-rf", rpc_dir])
219
- result = await self._sandbox.exec(["mkdir", "-p", rpc_dir])
220
+ result = await self._exec(["rm", "-rf", rpc_dir])
221
+ result = await self._exec(["mkdir", "-p", rpc_dir])
220
222
  if not result.success:
221
223
  raise RuntimeError(
222
224
  f"Error creating rpc directory '{name}' for sandbox '{self._name}': {result.stderr}"
@@ -224,11 +226,19 @@ class SandboxService:
224
226
  return rpc_dir
225
227
 
226
228
  async def _write_text_file(self, file: str, contents: str) -> None:
227
- result = await self._sandbox.exec(["tee", "--", file], input=contents)
229
+ result = await self._exec(["tee", "--", file], input=contents)
228
230
  if not result.success:
229
231
  msg = f"Failed to write file '{file}' into container: {result.stderr}"
230
232
  raise RuntimeError(msg)
231
233
 
234
+ async def _exec(self, cmd: list[str], input: str | None = None) -> ExecResult[str]:
235
+ try:
236
+ return await self._sandbox.exec(cmd, input=input, timeout=30)
237
+ except TimeoutError:
238
+ raise RuntimeError(
239
+ f"Timed out executing command {' '.join(cmd)} in sandbox"
240
+ )
241
+
232
242
  def _generate_client(self) -> str:
233
243
  return dedent(f"""
234
244
  from typing import Any
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import os
3
+ import shlex
3
4
  import sys
4
5
  from asyncio.subprocess import Process
5
6
  from contextvars import ContextVar
@@ -8,6 +9,8 @@ from logging import getLogger
8
9
  from pathlib import Path
9
10
  from typing import AsyncGenerator, Generic, Literal, TypeVar, Union, cast, overload
10
11
 
12
+ from inspect_ai._util.trace import trace_action
13
+
11
14
  from ._concurrency import concurrency
12
15
 
13
16
  logger = getLogger(__name__)
@@ -217,7 +220,9 @@ async def subprocess(
217
220
 
218
221
  # run command
219
222
  async with concurrency("subprocesses", max_subprocesses_context_var.get()):
220
- return await run_command_timeout()
223
+ message = args if isinstance(args, str) else shlex.join(args)
224
+ with trace_action(logger, "Subprocess", message):
225
+ return await run_command_timeout()
221
226
 
222
227
 
223
228
  def init_max_subprocesses(max_subprocesses: int | None = None) -> None:
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import inspect
3
3
  from functools import wraps
4
+ from logging import getLogger
4
5
  from typing import (
5
6
  Any,
6
7
  Callable,
@@ -13,6 +14,7 @@ from typing import (
13
14
 
14
15
  from inspect_ai._util._async import is_callable_coroutine
15
16
  from inspect_ai._util.content import Content
17
+ from inspect_ai._util.trace import trace_action
16
18
  from inspect_ai.util._store import Store, dict_jsonable, init_subtask_store
17
19
 
18
20
  SubtaskResult = str | int | float | bool | list[Content]
@@ -20,6 +22,9 @@ SubtaskResult = str | int | float | bool | list[Content]
20
22
  RT = TypeVar("RT", SubtaskResult, Any)
21
23
 
22
24
 
25
+ logger = getLogger(__name__)
26
+
27
+
23
28
  @runtime_checkable
24
29
  class Subtask(Protocol):
25
30
  """Subtask with distinct `Store` and `Transcript`.
@@ -118,8 +123,9 @@ def subtask(
118
123
  init_subtask(subtask_name, store if store else Store())
119
124
 
120
125
  # run the subtask
121
- with track_store_changes(): # type: ignore
122
- result = await func(*args, **kwargs)
126
+ with trace_action(logger, "Subtask", subtask_name):
127
+ with track_store_changes(): # type: ignore
128
+ result = await func(*args, **kwargs)
123
129
 
124
130
  # return result and event
125
131
  return result, list(transcript().events)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: inspect_ai
3
- Version: 0.3.53
3
+ Version: 0.3.55
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Safety Institute
6
6
  License: MIT License
@@ -67,7 +67,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
67
67
  Requires-Dist: pytest-cov; extra == "dev"
68
68
  Requires-Dist: pytest-dotenv; extra == "dev"
69
69
  Requires-Dist: pytest-xdist; extra == "dev"
70
- Requires-Dist: ruff==0.8.3; extra == "dev"
70
+ Requires-Dist: ruff==0.8.4; extra == "dev"
71
71
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
72
72
  Requires-Dist: types-PyYAML; extra == "dev"
73
73
  Requires-Dist: types-beautifulsoup4; extra == "dev"
@@ -96,22 +96,22 @@ To get started with Inspect, please see the documentation at <https://inspect.ai
96
96
 
97
97
  ***
98
98
 
99
-
100
-
101
99
  To work on development of Inspect, clone the repository and install with the `-e` flag and `[dev]` optional dependencies:
102
100
 
103
101
  ```bash
104
- $ git clone https://github.com/UKGovernmentBEIS/inspect_ai.git
105
- $ cd inspect_ai
106
- $ pip install -e ".[dev]"
102
+ git clone https://github.com/UKGovernmentBEIS/inspect_ai.git
103
+ cd inspect_ai
104
+ pip install -e ".[dev]"
107
105
  ```
108
106
 
109
107
  Optionally install pre-commit hooks via
108
+
110
109
  ```bash
111
110
  make hooks
112
111
  ```
113
112
 
114
113
  Run linting, formatting, and tests via
114
+
115
115
  ```bash
116
116
  make check
117
117
  make test