inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -2
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +1 -1
  16. inspect_ai/_eval/evalset.py +3 -3
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +5 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +2 -0
  21. inspect_ai/_eval/task/run.py +65 -39
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/server.py +17 -0
  28. inspect_ai/_view/www/dist/assets/index.css +93 -31
  29. inspect_ai/_view/www/dist/assets/index.js +10639 -10011
  30. inspect_ai/_view/www/log-schema.json +418 -1
  31. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  32. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  36. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  37. inspect_ai/_view/www/package.json +2 -2
  38. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  39. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  40. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  41. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  42. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  43. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  45. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
  46. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  48. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  49. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  50. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  51. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  55. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  56. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  57. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  58. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  59. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  60. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  61. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  62. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
  63. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
  64. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
  65. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
  66. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  67. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  68. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  69. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
  70. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  71. inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
  72. inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
  73. inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
  74. inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
  75. inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
  76. inspect_ai/_view/www/src/client/api/types.ts +3 -0
  77. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  78. inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
  79. inspect_ai/_view/www/src/tests/README.md +2 -2
  80. inspect_ai/_view/www/src/utils/git.ts +3 -1
  81. inspect_ai/_view/www/src/utils/html.ts +6 -0
  82. inspect_ai/agent/_handoff.py +8 -5
  83. inspect_ai/agent/_react.py +5 -5
  84. inspect_ai/dataset/_dataset.py +1 -1
  85. inspect_ai/log/_condense.py +5 -0
  86. inspect_ai/log/_file.py +4 -1
  87. inspect_ai/log/_log.py +9 -4
  88. inspect_ai/log/_recorders/json.py +4 -2
  89. inspect_ai/log/_samples.py +5 -0
  90. inspect_ai/log/_util.py +2 -0
  91. inspect_ai/model/__init__.py +14 -0
  92. inspect_ai/model/_call_tools.py +17 -8
  93. inspect_ai/model/_chat_message.py +3 -0
  94. inspect_ai/model/_openai_responses.py +80 -34
  95. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  96. inspect_ai/model/_providers/_google_citations.py +100 -0
  97. inspect_ai/model/_providers/anthropic.py +219 -36
  98. inspect_ai/model/_providers/google.py +98 -22
  99. inspect_ai/model/_providers/mistral.py +20 -7
  100. inspect_ai/model/_providers/openai.py +11 -10
  101. inspect_ai/model/_providers/openai_compatible.py +3 -2
  102. inspect_ai/model/_providers/openai_responses.py +2 -5
  103. inspect_ai/model/_providers/perplexity.py +123 -0
  104. inspect_ai/model/_providers/providers.py +13 -2
  105. inspect_ai/model/_providers/vertex.py +3 -0
  106. inspect_ai/model/_trim.py +5 -0
  107. inspect_ai/tool/__init__.py +14 -0
  108. inspect_ai/tool/_mcp/_mcp.py +5 -2
  109. inspect_ai/tool/_mcp/sampling.py +19 -3
  110. inspect_ai/tool/_mcp/server.py +1 -1
  111. inspect_ai/tool/_tool.py +10 -1
  112. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  113. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  114. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  115. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  116. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  117. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  118. inspect_ai/util/__init__.py +8 -0
  119. inspect_ai/util/_background.py +64 -0
  120. inspect_ai/util/_display.py +11 -2
  121. inspect_ai/util/_limit.py +72 -5
  122. inspect_ai/util/_sandbox/__init__.py +2 -0
  123. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  124. inspect_ai/util/_sandbox/service.py +28 -7
  125. inspect_ai/util/_span.py +12 -1
  126. inspect_ai/util/_subprocess.py +51 -38
  127. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
  128. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
  129. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  130. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  131. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
  132. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
  133. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
  134. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
inspect_ai/util/_limit.py CHANGED
@@ -4,6 +4,7 @@ import abc
4
4
  import logging
5
5
  from contextlib import ExitStack, contextmanager
6
6
  from contextvars import ContextVar
7
+ from dataclasses import dataclass
7
8
  from types import TracebackType
8
9
  from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
9
10
 
@@ -88,12 +89,31 @@ class Limit(abc.ABC):
88
89
  ) -> None:
89
90
  pass
90
91
 
92
+ @property
93
+ @abc.abstractmethod
94
+ def limit(self) -> float | None:
95
+ """The value of the limit being applied.
96
+
97
+ Can be None which represents no limit.
98
+ """
99
+ pass
100
+
91
101
  @property
92
102
  @abc.abstractmethod
93
103
  def usage(self) -> float:
94
104
  """The current usage of the resource being limited."""
95
105
  pass
96
106
 
107
+ @property
108
+ def remaining(self) -> float | None:
109
+ """The remaining "unused" amount of the resource being limited.
110
+
111
+ Returns None if the limit is None.
112
+ """
113
+ if self.limit is None:
114
+ return None
115
+ return self.limit - self.usage
116
+
97
117
  def _check_reuse(self) -> None:
98
118
  if self._entered:
99
119
  raise RuntimeError(
@@ -152,6 +172,46 @@ class LimitScope:
152
172
  self.limit_error: LimitExceededError | None = None
153
173
 
154
174
 
175
+ @dataclass
176
+ class SampleLimits:
177
+ """Data class to hold the limits applied to a Sample.
178
+
179
+ This is used to return the limits from `sample_limits()`.
180
+ """
181
+
182
+ token: Limit
183
+ """Token limit."""
184
+
185
+ message: Limit
186
+ """Message limit."""
187
+
188
+ working: Limit
189
+ """Working limit."""
190
+
191
+ time: Limit
192
+ """Time limit."""
193
+
194
+
195
+ def sample_limits() -> SampleLimits:
196
+ """Get the top-level limits applied to the current `Sample`."""
197
+
198
+ def get_root_node(node: TNode | None, name: str) -> TNode:
199
+ if node is None:
200
+ raise RuntimeError(
201
+ f"No {name} limit node found. Is there a running sample?"
202
+ )
203
+ while node.parent is not None:
204
+ node = node.parent
205
+ return node
206
+
207
+ return SampleLimits(
208
+ token=get_root_node(token_limit_tree.get(), "token"),
209
+ message=get_root_node(message_limit_tree.get(), "message"),
210
+ working=get_root_node(working_limit_tree.get(), "working"),
211
+ time=get_root_node(time_limit_tree.get(), "time"),
212
+ )
213
+
214
+
155
215
  def token_limit(limit: int | None) -> _TokenLimit:
156
216
  """Limits the total number of tokens which can be used.
157
217
 
@@ -319,10 +379,9 @@ class _Tree(Generic[TNode]):
319
379
 
320
380
 
321
381
  token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
322
- # Store the message limit leaf node so that we know which limit to check in
323
- # check_message_limit().
324
382
  message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
325
383
  working_limit_tree: _Tree[_WorkingLimit] = _Tree("working_limit_tree")
384
+ time_limit_tree: _Tree[_TimeLimit] = _Tree("time_limit_tree")
326
385
 
327
386
 
328
387
  class _Node:
@@ -497,7 +556,7 @@ class _MessageLimit(Limit, _Node):
497
556
  )
498
557
 
499
558
 
500
- class _TimeLimit(Limit):
559
+ class _TimeLimit(Limit, _Node):
501
560
  def __init__(self, limit: float | None) -> None:
502
561
  super().__init__()
503
562
  _validate_time_limit("Time", limit)
@@ -507,8 +566,7 @@ class _TimeLimit(Limit):
507
566
 
508
567
  def __enter__(self) -> Limit:
509
568
  super()._check_reuse()
510
- # Unlike the other limits, this one is not stored in a tree. Anyio handles all
511
- # of the state.
569
+ time_limit_tree.push(self)
512
570
  self._cancel_scope = anyio.move_on_after(self._limit)
513
571
  self._cancel_scope.__enter__()
514
572
  self._start_time = anyio.current_time()
@@ -524,6 +582,7 @@ class _TimeLimit(Limit):
524
582
 
525
583
  self._cancel_scope.__exit__(exc_type, exc_val, exc_tb)
526
584
  self._end_time = anyio.current_time()
585
+ self._pop_and_check_identity(time_limit_tree)
527
586
  if self._cancel_scope.cancel_called and self._limit is not None:
528
587
  message = f"Time limit exceeded. limit: {self._limit} seconds"
529
588
  assert self._start_time is not None
@@ -541,6 +600,10 @@ class _TimeLimit(Limit):
541
600
  source=self,
542
601
  ) from exc_val
543
602
 
603
+ @property
604
+ def limit(self) -> float | None:
605
+ return self._limit
606
+
544
607
  @property
545
608
  def usage(self) -> float:
546
609
  if self._start_time is None:
@@ -575,6 +638,10 @@ class _WorkingLimit(Limit, _Node):
575
638
  self._end_time = anyio.current_time()
576
639
  self._pop_and_check_identity(working_limit_tree)
577
640
 
641
+ @property
642
+ def limit(self) -> float | None:
643
+ return self._limit
644
+
578
645
  @property
579
646
  def usage(self) -> float:
580
647
  if self._start_time is None:
@@ -13,6 +13,7 @@ from .environment import (
13
13
  from .limits import OutputLimitExceededError, SandboxEnvironmentLimits
14
14
  from .local import LocalSandboxEnvironment # noqa: F401
15
15
  from .registry import sandboxenv
16
+ from .service import sandbox_service
16
17
 
17
18
  __all__ = [
18
19
  "OutputLimitExceededError",
@@ -27,4 +28,5 @@ __all__ = [
27
28
  "sandbox",
28
29
  "sandbox_with",
29
30
  "sandbox_default",
31
+ "sandbox_service",
30
32
  ]
@@ -11,7 +11,7 @@ from pydantic import BaseModel
11
11
  from inspect_ai._util.error import PrerequisiteError
12
12
  from inspect_ai._util.trace import trace_message
13
13
  from inspect_ai.util._concurrency import concurrency
14
- from inspect_ai.util._display import display_type
14
+ from inspect_ai.util._display import display_type, display_type_plain
15
15
  from inspect_ai.util._subprocess import ExecResult, subprocess
16
16
 
17
17
  from .prereqs import (
@@ -285,7 +285,7 @@ async def compose_command(
285
285
  env = project.env if (project.env and forward_env) else {}
286
286
 
287
287
  # ansi (apply global override)
288
- if display_type() == "plain":
288
+ if display_type_plain():
289
289
  ansi = "never"
290
290
  if ansi:
291
291
  compose_command = compose_command + ["--ansi", ansi]
@@ -44,14 +44,35 @@ async def sandbox_service(
44
44
  ) -> None:
45
45
  """Run a service that is callable from within a sandbox.
46
46
 
47
+ The service makes available a set of methods to a sandbox
48
+ for calling back into the main Inspect process.
49
+
50
+ To use the service from within a sandbox, either add it to the sys path
51
+ or use importlib. For example, if the service is named 'foo':
52
+
53
+ ```python
54
+ import sys
55
+ sys.path.append("/var/tmp/sandbox-services/foo")
56
+ import foo
57
+ ```
58
+
59
+ Or:
60
+
61
+ ```python
62
+ import importlib.util
63
+ spec = importlib.util.spec_from_file_location(
64
+ "foo", "/var/tmp/sandbox-services/foo/foo.py"
65
+ )
66
+ foo = importlib.util.module_from_spec(spec)
67
+ spec.loader.exec_module(foo)
68
+ ```
69
+
47
70
  Args:
48
- name (str): Service name
49
- methods (dict[str, SandboxServiceMethod]): Service methods.
50
- until (Callable[[], bool]): Function used to check whether
51
- the service should stop.
52
- sandbox (SandboxEnvironment): Sandbox to publish service to.
53
- user (str | None): User to login as. Defaults to the sandbox environment's
54
- default user.
71
+ name: Service name
72
+ methods: Service methods.
73
+ until: Function used to check whether the service should stop.
74
+ sandbox: Sandbox to publish service to.
75
+ user: User to login as. Defaults to the sandbox environment's default user.
55
76
  """
56
77
  # setup and start service
57
78
  service = SandboxService(name, sandbox, user)
inspect_ai/util/_span.py CHANGED
@@ -1,8 +1,12 @@
1
1
  import contextlib
2
+ import inspect
2
3
  from contextvars import ContextVar
4
+ from logging import getLogger
3
5
  from typing import AsyncIterator
4
6
  from uuid import uuid4
5
7
 
8
+ logger = getLogger(__name__)
9
+
6
10
 
7
11
  @contextlib.asynccontextmanager
8
12
  async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
@@ -22,6 +26,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
22
26
  # span id
23
27
  id = uuid4().hex
24
28
 
29
+ # span caller context
30
+ frame = inspect.stack()[1]
31
+ caller = f"{frame.function}() [{frame.filename}:{frame.lineno}]"
32
+
25
33
  # capture parent id
26
34
  parent_id = _current_span_id.get()
27
35
 
@@ -48,7 +56,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
48
56
  # send end event
49
57
  transcript()._event(SpanEndEvent(id=id))
50
58
 
51
- _current_span_id.reset(token)
59
+ try:
60
+ _current_span_id.reset(token)
61
+ except ValueError:
62
+ logger.warning(f"Exiting span created in another context: {caller}")
52
63
 
53
64
 
54
65
  def current_span_id() -> str | None:
@@ -2,16 +2,15 @@ import functools
2
2
  import io
3
3
  import os
4
4
  import shlex
5
- from contextlib import aclosing
6
5
  from contextvars import ContextVar
7
6
  from dataclasses import dataclass
8
7
  from logging import getLogger
9
8
  from pathlib import Path
10
9
  from subprocess import DEVNULL, PIPE
11
- from typing import AsyncGenerator, Generic, Literal, TypeVar, Union, cast, overload
10
+ from typing import Generic, Literal, TypeVar, Union, overload
12
11
 
13
12
  import anyio
14
- from anyio import open_process
13
+ from anyio import ClosedResourceError, create_task_group, open_process
15
14
  from anyio.abc import ByteReceiveStream, Process
16
15
 
17
16
  from inspect_ai._util._async import tg_collect
@@ -114,9 +113,7 @@ async def subprocess(
114
113
  else None
115
114
  )
116
115
 
117
- async def run_command() -> AsyncGenerator[
118
- Union[Process, ExecResult[str], ExecResult[bytes]], None
119
- ]:
116
+ async def run_command() -> Union[ExecResult[str], ExecResult[bytes]]:
120
117
  process = await open_process(
121
118
  args,
122
119
  stdin=PIPE if input else DEVNULL,
@@ -126,9 +123,6 @@ async def subprocess(
126
123
  env={**os.environ, **env},
127
124
  )
128
125
  try:
129
- # yield the process so the caller has a handle to it
130
- yield process
131
-
132
126
  # write to stdin (convert input to bytes)
133
127
  if process.stdin and input:
134
128
  await process.stdin.send(input)
@@ -161,19 +155,23 @@ async def subprocess(
161
155
  returncode = await process.wait()
162
156
  success = returncode == 0
163
157
  if text:
164
- yield ExecResult[str](
158
+ return ExecResult[str](
165
159
  success=success,
166
160
  returncode=returncode,
167
161
  stdout=stdout.decode() if capture_output else "",
168
162
  stderr=stderr.decode() if capture_output else "",
169
163
  )
170
164
  else:
171
- yield ExecResult[bytes](
165
+ return ExecResult[bytes](
172
166
  success=success,
173
167
  returncode=returncode,
174
168
  stdout=stdout if capture_output else bytes(),
175
169
  stderr=stderr if capture_output else bytes(),
176
170
  )
171
+ # Handle cancellation before aclose() is called to avoid deadlock.
172
+ except anyio.get_cancelled_exc_class():
173
+ await gracefully_terminate_cancelled_subprocess(process)
174
+ raise
177
175
  finally:
178
176
  try:
179
177
  await process.aclose()
@@ -186,33 +184,13 @@ async def subprocess(
186
184
 
187
185
  # wrapper for run command that implements timeout
188
186
  async def run_command_timeout() -> Union[ExecResult[str], ExecResult[bytes]]:
189
- # run the command and capture the process handle
190
- async with aclosing(run_command()) as rc:
191
- proc = cast(Process, await anext(rc))
192
-
193
- # await result wrapped in timeout handler if requested
194
- if timeout is not None:
195
- try:
196
- with anyio.fail_after(timeout):
197
- result = await anext(rc)
198
- return cast(Union[ExecResult[str], ExecResult[bytes]], result)
199
- except TimeoutError:
200
- # terminate timed out process -- try for graceful termination
201
- # then be more forceful if requied
202
- with anyio.CancelScope(shield=True):
203
- try:
204
- proc.terminate()
205
- await anyio.sleep(2)
206
- if proc.returncode is None:
207
- proc.kill()
208
- except Exception:
209
- pass
210
- raise
211
-
212
- # await result without timeout
213
- else:
214
- result = await anext(rc)
215
- return cast(Union[ExecResult[str], ExecResult[bytes]], result)
187
+ # wrap in timeout handler if requested
188
+ if timeout is not None:
189
+ with anyio.fail_after(timeout):
190
+ # run_command() handles terminating the process if it is cancelled.
191
+ return await run_command()
192
+ else:
193
+ return await run_command()
216
194
 
217
195
  # run command
218
196
  async with concurrency("subprocesses", max_subprocesses_context_var.get()):
@@ -233,6 +211,41 @@ def default_max_subprocesses() -> int:
233
211
  return cpus if cpus else 1
234
212
 
235
213
 
214
+ async def gracefully_terminate_cancelled_subprocess(process: Process) -> None:
215
+ with anyio.CancelScope(shield=True):
216
+ try:
217
+ # Terminate timed out process -- try for graceful termination then kill if
218
+ # required.
219
+ process.terminate()
220
+ await anyio.sleep(2)
221
+ if process.returncode is None:
222
+ process.kill()
223
+ # With anyio's asyncio backend, process.aclose() calls process.wait() which
224
+ # can deadlock if the process generates so much output that it blocks
225
+ # waiting for the OS pipe buffer to accept more data. See
226
+ # https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.subprocess.Process.wait
227
+ # Therefore, we need to ensure that the process's stdout and stderr streams
228
+ # are drained before we call process.wait() in aclose().
229
+ async with create_task_group() as tg:
230
+ tg.start_soon(drain_stream, process.stdout)
231
+ tg.start_soon(drain_stream, process.stderr)
232
+ # Wait for the process to exit. Will be called again by aclose().
233
+ await process.wait()
234
+ # The process may have already exited, in which case we can ignore the error.
235
+ except ProcessLookupError:
236
+ pass
237
+
238
+
239
+ async def drain_stream(stream: ByteReceiveStream | None) -> None:
240
+ if stream is None:
241
+ return
242
+ try:
243
+ async for _ in stream:
244
+ pass
245
+ except ClosedResourceError:
246
+ pass
247
+
248
+
236
249
  max_subprocesses_context_var = ContextVar[int](
237
250
  "max_subprocesses", default=default_max_subprocesses()
238
251
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.103
3
+ Version: 0.3.105
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -63,7 +63,7 @@ Requires-Dist: groq; extra == "dev"
63
63
  Requires-Dist: ipython; extra == "dev"
64
64
  Requires-Dist: jsonpath-ng; extra == "dev"
65
65
  Requires-Dist: markdown; extra == "dev"
66
- Requires-Dist: mcp; extra == "dev"
66
+ Requires-Dist: mcp>=1.9.4; extra == "dev"
67
67
  Requires-Dist: mistralai; extra == "dev"
68
68
  Requires-Dist: moto[server]; extra == "dev"
69
69
  Requires-Dist: mypy>=1.16.0; extra == "dev"