inspect-ai 0.3.72__py3-none-any.whl → 0.3.73__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +14 -3
- inspect_ai/_cli/sandbox.py +3 -3
- inspect_ai/_cli/score.py +6 -4
- inspect_ai/_cli/trace.py +53 -6
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +2 -1
- inspect_ai/_display/core/footer.py +6 -6
- inspect_ai/_display/plain/display.py +11 -6
- inspect_ai/_display/rich/display.py +23 -13
- inspect_ai/_display/textual/app.py +10 -9
- inspect_ai/_display/textual/display.py +2 -2
- inspect_ai/_display/textual/widgets/footer.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +14 -5
- inspect_ai/_eval/context.py +1 -2
- inspect_ai/_eval/eval.py +54 -41
- inspect_ai/_eval/loader.py +9 -2
- inspect_ai/_eval/run.py +148 -81
- inspect_ai/_eval/score.py +13 -8
- inspect_ai/_eval/task/images.py +31 -21
- inspect_ai/_eval/task/run.py +62 -59
- inspect_ai/_eval/task/rundir.py +16 -9
- inspect_ai/_eval/task/sandbox.py +7 -8
- inspect_ai/_eval/task/util.py +7 -0
- inspect_ai/_util/_async.py +118 -10
- inspect_ai/_util/constants.py +0 -2
- inspect_ai/_util/file.py +15 -29
- inspect_ai/_util/future.py +37 -0
- inspect_ai/_util/http.py +3 -99
- inspect_ai/_util/httpx.py +60 -0
- inspect_ai/_util/interrupt.py +2 -2
- inspect_ai/_util/json.py +5 -52
- inspect_ai/_util/logger.py +30 -86
- inspect_ai/_util/retry.py +10 -61
- inspect_ai/_util/trace.py +2 -2
- inspect_ai/_view/server.py +86 -3
- inspect_ai/_view/www/dist/assets/index.js +25837 -13269
- inspect_ai/_view/www/log-schema.json +253 -186
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
- inspect_ai/_view/www/src/types/log.d.ts +122 -94
- inspect_ai/approval/_human/manager.py +6 -10
- inspect_ai/approval/_human/panel.py +2 -2
- inspect_ai/dataset/_sources/util.py +7 -6
- inspect_ai/log/__init__.py +4 -0
- inspect_ai/log/_file.py +35 -61
- inspect_ai/log/_log.py +18 -1
- inspect_ai/log/_recorders/eval.py +14 -23
- inspect_ai/log/_recorders/json.py +3 -18
- inspect_ai/log/_samples.py +27 -2
- inspect_ai/log/_transcript.py +8 -8
- inspect_ai/model/__init__.py +2 -1
- inspect_ai/model/_call_tools.py +60 -40
- inspect_ai/model/_chat_message.py +3 -2
- inspect_ai/model/_generate_config.py +25 -0
- inspect_ai/model/_model.py +74 -36
- inspect_ai/model/_openai.py +9 -1
- inspect_ai/model/_providers/anthropic.py +24 -26
- inspect_ai/model/_providers/azureai.py +11 -9
- inspect_ai/model/_providers/bedrock.py +33 -24
- inspect_ai/model/_providers/cloudflare.py +8 -9
- inspect_ai/model/_providers/goodfire.py +7 -3
- inspect_ai/model/_providers/google.py +47 -13
- inspect_ai/model/_providers/groq.py +15 -15
- inspect_ai/model/_providers/hf.py +24 -17
- inspect_ai/model/_providers/mistral.py +36 -20
- inspect_ai/model/_providers/openai.py +30 -25
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +3 -4
- inspect_ai/model/_providers/util/__init__.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +6 -19
- inspect_ai/model/_providers/util/hooks.py +165 -0
- inspect_ai/model/_providers/vertex.py +20 -3
- inspect_ai/model/_providers/vllm.py +16 -19
- inspect_ai/scorer/_multi.py +5 -2
- inspect_ai/solver/_bridge/patch.py +31 -1
- inspect_ai/solver/_fork.py +5 -3
- inspect_ai/solver/_human_agent/agent.py +3 -2
- inspect_ai/tool/__init__.py +8 -2
- inspect_ai/tool/_tool_info.py +4 -90
- inspect_ai/tool/_tool_params.py +4 -34
- inspect_ai/tool/_tools/_web_search.py +30 -24
- inspect_ai/util/__init__.py +4 -0
- inspect_ai/util/_concurrency.py +5 -6
- inspect_ai/util/_display.py +6 -0
- inspect_ai/util/_json.py +170 -0
- inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
- inspect_ai/util/_sandbox/docker/docker.py +5 -0
- inspect_ai/util/_sandbox/environment.py +56 -9
- inspect_ai/util/_sandbox/service.py +12 -5
- inspect_ai/util/_subprocess.py +94 -113
- inspect_ai/util/_subtask.py +2 -4
- {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
- {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +99 -99
- {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
- inspect_ai/_util/timeouts.py +0 -160
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
- inspect_ai/model/_providers/util/tracker.py +0 -92
- {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0
inspect_ai/util/_subprocess.py
CHANGED
@@ -1,14 +1,20 @@
|
|
1
|
-
import
|
1
|
+
import functools
|
2
|
+
import io
|
2
3
|
import os
|
3
4
|
import shlex
|
4
|
-
import
|
5
|
-
from asyncio.subprocess import Process
|
5
|
+
from contextlib import aclosing
|
6
6
|
from contextvars import ContextVar
|
7
7
|
from dataclasses import dataclass
|
8
8
|
from logging import getLogger
|
9
9
|
from pathlib import Path
|
10
|
+
from subprocess import DEVNULL, PIPE
|
10
11
|
from typing import AsyncGenerator, Generic, Literal, TypeVar, Union, cast, overload
|
11
12
|
|
13
|
+
import anyio
|
14
|
+
from anyio import open_process
|
15
|
+
from anyio.abc import ByteReceiveStream, Process
|
16
|
+
|
17
|
+
from inspect_ai._util._async import tg_collect
|
12
18
|
from inspect_ai._util.trace import trace_action
|
13
19
|
|
14
20
|
from ._concurrency import concurrency
|
@@ -100,128 +106,103 @@ async def subprocess(
|
|
100
106
|
TimeoutError: If the specified `timeout` expires.
|
101
107
|
"""
|
102
108
|
# resolve input
|
103
|
-
input =
|
109
|
+
input = (
|
110
|
+
input.encode()
|
111
|
+
if isinstance(input, str)
|
112
|
+
else bytes(input)
|
113
|
+
if input is not None
|
114
|
+
else None
|
115
|
+
)
|
104
116
|
|
105
|
-
# function to run command (we may or may not run it w/ concurrency)
|
106
117
|
async def run_command() -> AsyncGenerator[
|
107
118
|
Union[Process, ExecResult[str], ExecResult[bytes]], None
|
108
119
|
]:
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
stdin
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
120
|
+
async with await open_process(
|
121
|
+
args,
|
122
|
+
stdin=PIPE if input else DEVNULL,
|
123
|
+
stdout=PIPE if capture_output else None,
|
124
|
+
stderr=PIPE if capture_output else None,
|
125
|
+
cwd=cwd,
|
126
|
+
env={**os.environ, **env},
|
127
|
+
) as process:
|
128
|
+
# yield the process so the caller has a handle to it
|
129
|
+
yield process
|
130
|
+
|
131
|
+
# write to stdin (convert input to bytes)
|
132
|
+
if process.stdin and input:
|
133
|
+
await process.stdin.send(input)
|
134
|
+
await process.stdin.aclose()
|
135
|
+
|
136
|
+
# read streams incrementally so we can check output limits
|
137
|
+
async def read_stream(stream: ByteReceiveStream | None) -> bytes:
|
138
|
+
# return early for no stream
|
139
|
+
if stream is None:
|
140
|
+
return bytes()
|
141
|
+
|
142
|
+
written = 0
|
143
|
+
buffer = io.BytesIO()
|
144
|
+
async for chunk in stream:
|
145
|
+
buffer.write(chunk)
|
146
|
+
written += len(chunk)
|
147
|
+
if output_limit is not None and written > output_limit:
|
148
|
+
process.kill()
|
149
|
+
break
|
150
|
+
|
151
|
+
return buffer.getvalue()
|
152
|
+
|
153
|
+
stdout, stderr = await tg_collect(
|
154
|
+
[
|
155
|
+
functools.partial(read_stream, process.stdout),
|
156
|
+
functools.partial(read_stream, process.stderr),
|
157
|
+
]
|
127
158
|
)
|
128
159
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
# read 8k at a time
|
147
|
-
output = bytearray()
|
148
|
-
while True:
|
149
|
-
# read chunk and terminate if we are done
|
150
|
-
chunk = await stream.read(8192)
|
151
|
-
if not chunk:
|
152
|
-
break
|
153
|
-
|
154
|
-
# append to output
|
155
|
-
output.extend(chunk)
|
156
|
-
|
157
|
-
# stop if we have a limit and we have exceeded it
|
158
|
-
if output_limit is not None and len(output) > output_limit:
|
159
|
-
proc.kill()
|
160
|
-
break
|
161
|
-
|
162
|
-
# return stream output
|
163
|
-
return bytes(output)
|
164
|
-
|
165
|
-
# wait for it to execute and yield result
|
166
|
-
stdout, stderr = await asyncio.gather(
|
167
|
-
read_stream(proc.stdout), read_stream(proc.stderr)
|
168
|
-
)
|
169
|
-
returncode = await proc.wait()
|
170
|
-
success = returncode == 0
|
171
|
-
if text:
|
172
|
-
yield ExecResult[str](
|
173
|
-
success=success,
|
174
|
-
returncode=returncode,
|
175
|
-
stdout=stdout.decode() if capture_output else "",
|
176
|
-
stderr=stderr.decode() if capture_output else "",
|
177
|
-
)
|
178
|
-
else:
|
179
|
-
yield ExecResult[bytes](
|
180
|
-
success=success,
|
181
|
-
returncode=returncode,
|
182
|
-
stdout=stdout if capture_output else bytes(),
|
183
|
-
stderr=stderr if capture_output else bytes(),
|
184
|
-
)
|
160
|
+
returncode = await process.wait()
|
161
|
+
success = returncode == 0
|
162
|
+
if text:
|
163
|
+
yield ExecResult[str](
|
164
|
+
success=success,
|
165
|
+
returncode=returncode,
|
166
|
+
stdout=stdout.decode() if capture_output else "",
|
167
|
+
stderr=stderr.decode() if capture_output else "",
|
168
|
+
)
|
169
|
+
else:
|
170
|
+
yield ExecResult[bytes](
|
171
|
+
success=success,
|
172
|
+
returncode=returncode,
|
173
|
+
stdout=stdout if capture_output else bytes(),
|
174
|
+
stderr=stderr if capture_output else bytes(),
|
175
|
+
)
|
185
176
|
|
186
177
|
# wrapper for run command that implements timeout
|
187
178
|
async def run_command_timeout() -> Union[ExecResult[str], ExecResult[bytes]]:
|
188
179
|
# run the command and capture the process handle
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
async with asyncio.timeout(timeout):
|
180
|
+
async with aclosing(run_command()) as rc:
|
181
|
+
proc = cast(Process, await anext(rc))
|
182
|
+
|
183
|
+
# await result wrapped in timeout handler if requested
|
184
|
+
if timeout:
|
185
|
+
try:
|
186
|
+
with anyio.fail_after(timeout):
|
197
187
|
result = await anext(rc)
|
198
188
|
return cast(Union[ExecResult[str], ExecResult[bytes]], result)
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
)
|
217
|
-
|
218
|
-
# raise standard Python TimeoutError
|
219
|
-
raise TimeoutError
|
220
|
-
|
221
|
-
# await result without timeout
|
222
|
-
else:
|
223
|
-
result = await anext(rc)
|
224
|
-
return cast(Union[ExecResult[str], ExecResult[bytes]], result)
|
189
|
+
except TimeoutError:
|
190
|
+
# terminate timed out process -- try for graceful termination
|
191
|
+
# then be more forceful if requied
|
192
|
+
with anyio.CancelScope(shield=True):
|
193
|
+
try:
|
194
|
+
proc.terminate()
|
195
|
+
await anyio.sleep(2)
|
196
|
+
if proc.returncode is None:
|
197
|
+
proc.kill()
|
198
|
+
except Exception:
|
199
|
+
pass
|
200
|
+
raise
|
201
|
+
|
202
|
+
# await result without timeout
|
203
|
+
else:
|
204
|
+
result = await anext(rc)
|
205
|
+
return cast(Union[ExecResult[str], ExecResult[bytes]], result)
|
225
206
|
|
226
207
|
# run command
|
227
208
|
async with concurrency("subprocesses", max_subprocesses_context_var.get()):
|
inspect_ai/util/_subtask.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import asyncio
|
2
1
|
import inspect
|
3
2
|
from datetime import datetime
|
4
3
|
from functools import wraps
|
@@ -13,7 +12,7 @@ from typing import (
|
|
13
12
|
runtime_checkable,
|
14
13
|
)
|
15
14
|
|
16
|
-
from inspect_ai._util._async import is_callable_coroutine
|
15
|
+
from inspect_ai._util._async import is_callable_coroutine, tg_collect
|
17
16
|
from inspect_ai._util.content import Content
|
18
17
|
from inspect_ai._util.trace import trace_action
|
19
18
|
from inspect_ai._util.working import sample_waiting_time
|
@@ -139,8 +138,7 @@ def subtask(
|
|
139
138
|
transcript()._event(event)
|
140
139
|
|
141
140
|
# create and run the task as a coroutine
|
142
|
-
|
143
|
-
result, events = await asyncio_task
|
141
|
+
result, events = (await tg_collect([run]))[0]
|
144
142
|
|
145
143
|
# time accounting
|
146
144
|
completed = datetime.now()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.73
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
@@ -21,11 +21,12 @@ Requires-Python: >=3.10
|
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE
|
23
23
|
Requires-Dist: aiohttp>=3.9.0
|
24
|
-
Requires-Dist: anyio>=4.
|
24
|
+
Requires-Dist: anyio>=4.8.0
|
25
25
|
Requires-Dist: beautifulsoup4
|
26
26
|
Requires-Dist: click>=8.1.3
|
27
27
|
Requires-Dist: debugpy
|
28
28
|
Requires-Dist: docstring-parser>=0.16
|
29
|
+
Requires-Dist: exceptiongroup>=1.0.2; python_version < "3.11"
|
29
30
|
Requires-Dist: fsspec<=2024.12.0,>=2023.1.0
|
30
31
|
Requires-Dist: httpx
|
31
32
|
Requires-Dist: ijson>=3.2.0
|
@@ -44,6 +45,7 @@ Requires-Dist: rich>=13.3.3
|
|
44
45
|
Requires-Dist: s3fs>=2023
|
45
46
|
Requires-Dist: semver>=3.0.0
|
46
47
|
Requires-Dist: shortuuid
|
48
|
+
Requires-Dist: sniffio
|
47
49
|
Requires-Dist: tenacity
|
48
50
|
Requires-Dist: textual>=0.86.2
|
49
51
|
Requires-Dist: typing_extensions>=4.9.0
|
@@ -73,6 +75,7 @@ Requires-Dist: pytest-dotenv; extra == "dev"
|
|
73
75
|
Requires-Dist: pytest-xdist; extra == "dev"
|
74
76
|
Requires-Dist: ruff==0.9.6; extra == "dev"
|
75
77
|
Requires-Dist: textual-dev>=0.86.2; extra == "dev"
|
78
|
+
Requires-Dist: trio; extra == "dev"
|
76
79
|
Requires-Dist: types-Markdown; extra == "dev"
|
77
80
|
Requires-Dist: types-PyYAML; extra == "dev"
|
78
81
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
@@ -89,6 +92,7 @@ Requires-Dist: quarto-cli==1.5.57; extra == "doc"
|
|
89
92
|
Requires-Dist: jupyter; extra == "doc"
|
90
93
|
Requires-Dist: panflute; extra == "doc"
|
91
94
|
Requires-Dist: markdown; extra == "doc"
|
95
|
+
Requires-Dist: griffe; extra == "doc"
|
92
96
|
Provides-Extra: dist
|
93
97
|
Requires-Dist: twine; extra == "dist"
|
94
98
|
Requires-Dist: build; extra == "dist"
|