inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +3 -3
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +65 -39
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/server.py +17 -0
- inspect_ai/_view/www/dist/assets/index.css +93 -31
- inspect_ai/_view/www/dist/assets/index.js +10639 -10011
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
- inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
- inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
- inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
- inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
- inspect_ai/_view/www/src/client/api/types.ts +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +8 -5
- inspect_ai/agent/_react.py +5 -5
- inspect_ai/dataset/_dataset.py +1 -1
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_samples.py +5 -0
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +17 -8
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +219 -36
- inspect_ai/model/_providers/google.py +98 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/__init__.py +8 -0
- inspect_ai/util/_background.py +64 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_limit.py +72 -5
- inspect_ai/util/_sandbox/__init__.py +2 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/service.py +28 -7
- inspect_ai/util/_span.py +12 -1
- inspect_ai/util/_subprocess.py +51 -38
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
inspect_ai/util/_limit.py
CHANGED
@@ -4,6 +4,7 @@ import abc
|
|
4
4
|
import logging
|
5
5
|
from contextlib import ExitStack, contextmanager
|
6
6
|
from contextvars import ContextVar
|
7
|
+
from dataclasses import dataclass
|
7
8
|
from types import TracebackType
|
8
9
|
from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
|
9
10
|
|
@@ -88,12 +89,31 @@ class Limit(abc.ABC):
|
|
88
89
|
) -> None:
|
89
90
|
pass
|
90
91
|
|
92
|
+
@property
|
93
|
+
@abc.abstractmethod
|
94
|
+
def limit(self) -> float | None:
|
95
|
+
"""The value of the limit being applied.
|
96
|
+
|
97
|
+
Can be None which represents no limit.
|
98
|
+
"""
|
99
|
+
pass
|
100
|
+
|
91
101
|
@property
|
92
102
|
@abc.abstractmethod
|
93
103
|
def usage(self) -> float:
|
94
104
|
"""The current usage of the resource being limited."""
|
95
105
|
pass
|
96
106
|
|
107
|
+
@property
|
108
|
+
def remaining(self) -> float | None:
|
109
|
+
"""The remaining "unused" amount of the resource being limited.
|
110
|
+
|
111
|
+
Returns None if the limit is None.
|
112
|
+
"""
|
113
|
+
if self.limit is None:
|
114
|
+
return None
|
115
|
+
return self.limit - self.usage
|
116
|
+
|
97
117
|
def _check_reuse(self) -> None:
|
98
118
|
if self._entered:
|
99
119
|
raise RuntimeError(
|
@@ -152,6 +172,46 @@ class LimitScope:
|
|
152
172
|
self.limit_error: LimitExceededError | None = None
|
153
173
|
|
154
174
|
|
175
|
+
@dataclass
|
176
|
+
class SampleLimits:
|
177
|
+
"""Data class to hold the limits applied to a Sample.
|
178
|
+
|
179
|
+
This is used to return the limits from `sample_limits()`.
|
180
|
+
"""
|
181
|
+
|
182
|
+
token: Limit
|
183
|
+
"""Token limit."""
|
184
|
+
|
185
|
+
message: Limit
|
186
|
+
"""Message limit."""
|
187
|
+
|
188
|
+
working: Limit
|
189
|
+
"""Working limit."""
|
190
|
+
|
191
|
+
time: Limit
|
192
|
+
"""Time limit."""
|
193
|
+
|
194
|
+
|
195
|
+
def sample_limits() -> SampleLimits:
|
196
|
+
"""Get the top-level limits applied to the current `Sample`."""
|
197
|
+
|
198
|
+
def get_root_node(node: TNode | None, name: str) -> TNode:
|
199
|
+
if node is None:
|
200
|
+
raise RuntimeError(
|
201
|
+
f"No {name} limit node found. Is there a running sample?"
|
202
|
+
)
|
203
|
+
while node.parent is not None:
|
204
|
+
node = node.parent
|
205
|
+
return node
|
206
|
+
|
207
|
+
return SampleLimits(
|
208
|
+
token=get_root_node(token_limit_tree.get(), "token"),
|
209
|
+
message=get_root_node(message_limit_tree.get(), "message"),
|
210
|
+
working=get_root_node(working_limit_tree.get(), "working"),
|
211
|
+
time=get_root_node(time_limit_tree.get(), "time"),
|
212
|
+
)
|
213
|
+
|
214
|
+
|
155
215
|
def token_limit(limit: int | None) -> _TokenLimit:
|
156
216
|
"""Limits the total number of tokens which can be used.
|
157
217
|
|
@@ -319,10 +379,9 @@ class _Tree(Generic[TNode]):
|
|
319
379
|
|
320
380
|
|
321
381
|
token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
|
322
|
-
# Store the message limit leaf node so that we know which limit to check in
|
323
|
-
# check_message_limit().
|
324
382
|
message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
|
325
383
|
working_limit_tree: _Tree[_WorkingLimit] = _Tree("working_limit_tree")
|
384
|
+
time_limit_tree: _Tree[_TimeLimit] = _Tree("time_limit_tree")
|
326
385
|
|
327
386
|
|
328
387
|
class _Node:
|
@@ -497,7 +556,7 @@ class _MessageLimit(Limit, _Node):
|
|
497
556
|
)
|
498
557
|
|
499
558
|
|
500
|
-
class _TimeLimit(Limit):
|
559
|
+
class _TimeLimit(Limit, _Node):
|
501
560
|
def __init__(self, limit: float | None) -> None:
|
502
561
|
super().__init__()
|
503
562
|
_validate_time_limit("Time", limit)
|
@@ -507,8 +566,7 @@ class _TimeLimit(Limit):
|
|
507
566
|
|
508
567
|
def __enter__(self) -> Limit:
|
509
568
|
super()._check_reuse()
|
510
|
-
|
511
|
-
# of the state.
|
569
|
+
time_limit_tree.push(self)
|
512
570
|
self._cancel_scope = anyio.move_on_after(self._limit)
|
513
571
|
self._cancel_scope.__enter__()
|
514
572
|
self._start_time = anyio.current_time()
|
@@ -524,6 +582,7 @@ class _TimeLimit(Limit):
|
|
524
582
|
|
525
583
|
self._cancel_scope.__exit__(exc_type, exc_val, exc_tb)
|
526
584
|
self._end_time = anyio.current_time()
|
585
|
+
self._pop_and_check_identity(time_limit_tree)
|
527
586
|
if self._cancel_scope.cancel_called and self._limit is not None:
|
528
587
|
message = f"Time limit exceeded. limit: {self._limit} seconds"
|
529
588
|
assert self._start_time is not None
|
@@ -541,6 +600,10 @@ class _TimeLimit(Limit):
|
|
541
600
|
source=self,
|
542
601
|
) from exc_val
|
543
602
|
|
603
|
+
@property
|
604
|
+
def limit(self) -> float | None:
|
605
|
+
return self._limit
|
606
|
+
|
544
607
|
@property
|
545
608
|
def usage(self) -> float:
|
546
609
|
if self._start_time is None:
|
@@ -575,6 +638,10 @@ class _WorkingLimit(Limit, _Node):
|
|
575
638
|
self._end_time = anyio.current_time()
|
576
639
|
self._pop_and_check_identity(working_limit_tree)
|
577
640
|
|
641
|
+
@property
|
642
|
+
def limit(self) -> float | None:
|
643
|
+
return self._limit
|
644
|
+
|
578
645
|
@property
|
579
646
|
def usage(self) -> float:
|
580
647
|
if self._start_time is None:
|
@@ -13,6 +13,7 @@ from .environment import (
|
|
13
13
|
from .limits import OutputLimitExceededError, SandboxEnvironmentLimits
|
14
14
|
from .local import LocalSandboxEnvironment # noqa: F401
|
15
15
|
from .registry import sandboxenv
|
16
|
+
from .service import sandbox_service
|
16
17
|
|
17
18
|
__all__ = [
|
18
19
|
"OutputLimitExceededError",
|
@@ -27,4 +28,5 @@ __all__ = [
|
|
27
28
|
"sandbox",
|
28
29
|
"sandbox_with",
|
29
30
|
"sandbox_default",
|
31
|
+
"sandbox_service",
|
30
32
|
]
|
@@ -11,7 +11,7 @@ from pydantic import BaseModel
|
|
11
11
|
from inspect_ai._util.error import PrerequisiteError
|
12
12
|
from inspect_ai._util.trace import trace_message
|
13
13
|
from inspect_ai.util._concurrency import concurrency
|
14
|
-
from inspect_ai.util._display import display_type
|
14
|
+
from inspect_ai.util._display import display_type, display_type_plain
|
15
15
|
from inspect_ai.util._subprocess import ExecResult, subprocess
|
16
16
|
|
17
17
|
from .prereqs import (
|
@@ -285,7 +285,7 @@ async def compose_command(
|
|
285
285
|
env = project.env if (project.env and forward_env) else {}
|
286
286
|
|
287
287
|
# ansi (apply global override)
|
288
|
-
if
|
288
|
+
if display_type_plain():
|
289
289
|
ansi = "never"
|
290
290
|
if ansi:
|
291
291
|
compose_command = compose_command + ["--ansi", ansi]
|
@@ -44,14 +44,35 @@ async def sandbox_service(
|
|
44
44
|
) -> None:
|
45
45
|
"""Run a service that is callable from within a sandbox.
|
46
46
|
|
47
|
+
The service makes available a set of methods to a sandbox
|
48
|
+
for calling back into the main Inspect process.
|
49
|
+
|
50
|
+
To use the service from within a sandbox, either add it to the sys path
|
51
|
+
or use importlib. For example, if the service is named 'foo':
|
52
|
+
|
53
|
+
```python
|
54
|
+
import sys
|
55
|
+
sys.path.append("/var/tmp/sandbox-services/foo")
|
56
|
+
import foo
|
57
|
+
```
|
58
|
+
|
59
|
+
Or:
|
60
|
+
|
61
|
+
```python
|
62
|
+
import importlib.util
|
63
|
+
spec = importlib.util.spec_from_file_location(
|
64
|
+
"foo", "/var/tmp/sandbox-services/foo/foo.py"
|
65
|
+
)
|
66
|
+
foo = importlib.util.module_from_spec(spec)
|
67
|
+
spec.loader.exec_module(foo)
|
68
|
+
```
|
69
|
+
|
47
70
|
Args:
|
48
|
-
name
|
49
|
-
methods
|
50
|
-
until
|
51
|
-
|
52
|
-
|
53
|
-
user (str | None): User to login as. Defaults to the sandbox environment's
|
54
|
-
default user.
|
71
|
+
name: Service name
|
72
|
+
methods: Service methods.
|
73
|
+
until: Function used to check whether the service should stop.
|
74
|
+
sandbox: Sandbox to publish service to.
|
75
|
+
user: User to login as. Defaults to the sandbox environment's default user.
|
55
76
|
"""
|
56
77
|
# setup and start service
|
57
78
|
service = SandboxService(name, sandbox, user)
|
inspect_ai/util/_span.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
import contextlib
|
2
|
+
import inspect
|
2
3
|
from contextvars import ContextVar
|
4
|
+
from logging import getLogger
|
3
5
|
from typing import AsyncIterator
|
4
6
|
from uuid import uuid4
|
5
7
|
|
8
|
+
logger = getLogger(__name__)
|
9
|
+
|
6
10
|
|
7
11
|
@contextlib.asynccontextmanager
|
8
12
|
async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
|
@@ -22,6 +26,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
|
|
22
26
|
# span id
|
23
27
|
id = uuid4().hex
|
24
28
|
|
29
|
+
# span caller context
|
30
|
+
frame = inspect.stack()[1]
|
31
|
+
caller = f"{frame.function}() [{frame.filename}:{frame.lineno}]"
|
32
|
+
|
25
33
|
# capture parent id
|
26
34
|
parent_id = _current_span_id.get()
|
27
35
|
|
@@ -48,7 +56,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
|
|
48
56
|
# send end event
|
49
57
|
transcript()._event(SpanEndEvent(id=id))
|
50
58
|
|
51
|
-
|
59
|
+
try:
|
60
|
+
_current_span_id.reset(token)
|
61
|
+
except ValueError:
|
62
|
+
logger.warning(f"Exiting span created in another context: {caller}")
|
52
63
|
|
53
64
|
|
54
65
|
def current_span_id() -> str | None:
|
inspect_ai/util/_subprocess.py
CHANGED
@@ -2,16 +2,15 @@ import functools
|
|
2
2
|
import io
|
3
3
|
import os
|
4
4
|
import shlex
|
5
|
-
from contextlib import aclosing
|
6
5
|
from contextvars import ContextVar
|
7
6
|
from dataclasses import dataclass
|
8
7
|
from logging import getLogger
|
9
8
|
from pathlib import Path
|
10
9
|
from subprocess import DEVNULL, PIPE
|
11
|
-
from typing import
|
10
|
+
from typing import Generic, Literal, TypeVar, Union, overload
|
12
11
|
|
13
12
|
import anyio
|
14
|
-
from anyio import open_process
|
13
|
+
from anyio import ClosedResourceError, create_task_group, open_process
|
15
14
|
from anyio.abc import ByteReceiveStream, Process
|
16
15
|
|
17
16
|
from inspect_ai._util._async import tg_collect
|
@@ -114,9 +113,7 @@ async def subprocess(
|
|
114
113
|
else None
|
115
114
|
)
|
116
115
|
|
117
|
-
async def run_command() ->
|
118
|
-
Union[Process, ExecResult[str], ExecResult[bytes]], None
|
119
|
-
]:
|
116
|
+
async def run_command() -> Union[ExecResult[str], ExecResult[bytes]]:
|
120
117
|
process = await open_process(
|
121
118
|
args,
|
122
119
|
stdin=PIPE if input else DEVNULL,
|
@@ -126,9 +123,6 @@ async def subprocess(
|
|
126
123
|
env={**os.environ, **env},
|
127
124
|
)
|
128
125
|
try:
|
129
|
-
# yield the process so the caller has a handle to it
|
130
|
-
yield process
|
131
|
-
|
132
126
|
# write to stdin (convert input to bytes)
|
133
127
|
if process.stdin and input:
|
134
128
|
await process.stdin.send(input)
|
@@ -161,19 +155,23 @@ async def subprocess(
|
|
161
155
|
returncode = await process.wait()
|
162
156
|
success = returncode == 0
|
163
157
|
if text:
|
164
|
-
|
158
|
+
return ExecResult[str](
|
165
159
|
success=success,
|
166
160
|
returncode=returncode,
|
167
161
|
stdout=stdout.decode() if capture_output else "",
|
168
162
|
stderr=stderr.decode() if capture_output else "",
|
169
163
|
)
|
170
164
|
else:
|
171
|
-
|
165
|
+
return ExecResult[bytes](
|
172
166
|
success=success,
|
173
167
|
returncode=returncode,
|
174
168
|
stdout=stdout if capture_output else bytes(),
|
175
169
|
stderr=stderr if capture_output else bytes(),
|
176
170
|
)
|
171
|
+
# Handle cancellation before aclose() is called to avoid deadlock.
|
172
|
+
except anyio.get_cancelled_exc_class():
|
173
|
+
await gracefully_terminate_cancelled_subprocess(process)
|
174
|
+
raise
|
177
175
|
finally:
|
178
176
|
try:
|
179
177
|
await process.aclose()
|
@@ -186,33 +184,13 @@ async def subprocess(
|
|
186
184
|
|
187
185
|
# wrapper for run command that implements timeout
|
188
186
|
async def run_command_timeout() -> Union[ExecResult[str], ExecResult[bytes]]:
|
189
|
-
#
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
with anyio.fail_after(timeout):
|
197
|
-
result = await anext(rc)
|
198
|
-
return cast(Union[ExecResult[str], ExecResult[bytes]], result)
|
199
|
-
except TimeoutError:
|
200
|
-
# terminate timed out process -- try for graceful termination
|
201
|
-
# then be more forceful if requied
|
202
|
-
with anyio.CancelScope(shield=True):
|
203
|
-
try:
|
204
|
-
proc.terminate()
|
205
|
-
await anyio.sleep(2)
|
206
|
-
if proc.returncode is None:
|
207
|
-
proc.kill()
|
208
|
-
except Exception:
|
209
|
-
pass
|
210
|
-
raise
|
211
|
-
|
212
|
-
# await result without timeout
|
213
|
-
else:
|
214
|
-
result = await anext(rc)
|
215
|
-
return cast(Union[ExecResult[str], ExecResult[bytes]], result)
|
187
|
+
# wrap in timeout handler if requested
|
188
|
+
if timeout is not None:
|
189
|
+
with anyio.fail_after(timeout):
|
190
|
+
# run_command() handles terminating the process if it is cancelled.
|
191
|
+
return await run_command()
|
192
|
+
else:
|
193
|
+
return await run_command()
|
216
194
|
|
217
195
|
# run command
|
218
196
|
async with concurrency("subprocesses", max_subprocesses_context_var.get()):
|
@@ -233,6 +211,41 @@ def default_max_subprocesses() -> int:
|
|
233
211
|
return cpus if cpus else 1
|
234
212
|
|
235
213
|
|
214
|
+
async def gracefully_terminate_cancelled_subprocess(process: Process) -> None:
|
215
|
+
with anyio.CancelScope(shield=True):
|
216
|
+
try:
|
217
|
+
# Terminate timed out process -- try for graceful termination then kill if
|
218
|
+
# required.
|
219
|
+
process.terminate()
|
220
|
+
await anyio.sleep(2)
|
221
|
+
if process.returncode is None:
|
222
|
+
process.kill()
|
223
|
+
# With anyio's asyncio backend, process.aclose() calls process.wait() which
|
224
|
+
# can deadlock if the process generates so much output that it blocks
|
225
|
+
# waiting for the OS pipe buffer to accept more data. See
|
226
|
+
# https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.subprocess.Process.wait
|
227
|
+
# Therefore, we need to ensure that the process's stdout and stderr streams
|
228
|
+
# are drained before we call process.wait() in aclose().
|
229
|
+
async with create_task_group() as tg:
|
230
|
+
tg.start_soon(drain_stream, process.stdout)
|
231
|
+
tg.start_soon(drain_stream, process.stderr)
|
232
|
+
# Wait for the process to exit. Will be called again by aclose().
|
233
|
+
await process.wait()
|
234
|
+
# The process may have already exited, in which case we can ignore the error.
|
235
|
+
except ProcessLookupError:
|
236
|
+
pass
|
237
|
+
|
238
|
+
|
239
|
+
async def drain_stream(stream: ByteReceiveStream | None) -> None:
|
240
|
+
if stream is None:
|
241
|
+
return
|
242
|
+
try:
|
243
|
+
async for _ in stream:
|
244
|
+
pass
|
245
|
+
except ClosedResourceError:
|
246
|
+
pass
|
247
|
+
|
248
|
+
|
236
249
|
max_subprocesses_context_var = ContextVar[int](
|
237
250
|
"max_subprocesses", default=default_max_subprocesses()
|
238
251
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.105
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
@@ -63,7 +63,7 @@ Requires-Dist: groq; extra == "dev"
|
|
63
63
|
Requires-Dist: ipython; extra == "dev"
|
64
64
|
Requires-Dist: jsonpath-ng; extra == "dev"
|
65
65
|
Requires-Dist: markdown; extra == "dev"
|
66
|
-
Requires-Dist: mcp; extra == "dev"
|
66
|
+
Requires-Dist: mcp>=1.9.4; extra == "dev"
|
67
67
|
Requires-Dist: mistralai; extra == "dev"
|
68
68
|
Requires-Dist: moto[server]; extra == "dev"
|
69
69
|
Requires-Dist: mypy>=1.16.0; extra == "dev"
|