inspect-ai 0.3.72__py3-none-any.whl → 0.3.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. inspect_ai/_cli/eval.py +14 -3
  2. inspect_ai/_cli/sandbox.py +3 -3
  3. inspect_ai/_cli/score.py +6 -4
  4. inspect_ai/_cli/trace.py +53 -6
  5. inspect_ai/_display/core/config.py +1 -1
  6. inspect_ai/_display/core/display.py +2 -1
  7. inspect_ai/_display/core/footer.py +6 -6
  8. inspect_ai/_display/plain/display.py +11 -6
  9. inspect_ai/_display/rich/display.py +23 -13
  10. inspect_ai/_display/textual/app.py +10 -9
  11. inspect_ai/_display/textual/display.py +2 -2
  12. inspect_ai/_display/textual/widgets/footer.py +4 -0
  13. inspect_ai/_display/textual/widgets/samples.py +14 -5
  14. inspect_ai/_eval/context.py +1 -2
  15. inspect_ai/_eval/eval.py +54 -41
  16. inspect_ai/_eval/loader.py +9 -2
  17. inspect_ai/_eval/run.py +148 -81
  18. inspect_ai/_eval/score.py +13 -8
  19. inspect_ai/_eval/task/images.py +31 -21
  20. inspect_ai/_eval/task/run.py +62 -59
  21. inspect_ai/_eval/task/rundir.py +16 -9
  22. inspect_ai/_eval/task/sandbox.py +7 -8
  23. inspect_ai/_eval/task/util.py +7 -0
  24. inspect_ai/_util/_async.py +118 -10
  25. inspect_ai/_util/constants.py +0 -2
  26. inspect_ai/_util/file.py +15 -29
  27. inspect_ai/_util/future.py +37 -0
  28. inspect_ai/_util/http.py +3 -99
  29. inspect_ai/_util/httpx.py +60 -0
  30. inspect_ai/_util/interrupt.py +2 -2
  31. inspect_ai/_util/json.py +5 -52
  32. inspect_ai/_util/logger.py +30 -86
  33. inspect_ai/_util/retry.py +10 -61
  34. inspect_ai/_util/trace.py +2 -2
  35. inspect_ai/_view/server.py +86 -3
  36. inspect_ai/_view/www/dist/assets/index.js +25837 -13269
  37. inspect_ai/_view/www/log-schema.json +253 -186
  38. inspect_ai/_view/www/package.json +2 -2
  39. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
  40. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
  41. inspect_ai/_view/www/src/types/log.d.ts +122 -94
  42. inspect_ai/approval/_human/manager.py +6 -10
  43. inspect_ai/approval/_human/panel.py +2 -2
  44. inspect_ai/dataset/_sources/util.py +7 -6
  45. inspect_ai/log/__init__.py +4 -0
  46. inspect_ai/log/_file.py +35 -61
  47. inspect_ai/log/_log.py +18 -1
  48. inspect_ai/log/_recorders/eval.py +14 -23
  49. inspect_ai/log/_recorders/json.py +3 -18
  50. inspect_ai/log/_samples.py +27 -2
  51. inspect_ai/log/_transcript.py +8 -8
  52. inspect_ai/model/__init__.py +2 -1
  53. inspect_ai/model/_call_tools.py +60 -40
  54. inspect_ai/model/_chat_message.py +3 -2
  55. inspect_ai/model/_generate_config.py +25 -0
  56. inspect_ai/model/_model.py +74 -36
  57. inspect_ai/model/_openai.py +9 -1
  58. inspect_ai/model/_providers/anthropic.py +24 -26
  59. inspect_ai/model/_providers/azureai.py +11 -9
  60. inspect_ai/model/_providers/bedrock.py +33 -24
  61. inspect_ai/model/_providers/cloudflare.py +8 -9
  62. inspect_ai/model/_providers/goodfire.py +7 -3
  63. inspect_ai/model/_providers/google.py +47 -13
  64. inspect_ai/model/_providers/groq.py +15 -15
  65. inspect_ai/model/_providers/hf.py +24 -17
  66. inspect_ai/model/_providers/mistral.py +36 -20
  67. inspect_ai/model/_providers/openai.py +30 -25
  68. inspect_ai/model/_providers/openai_o1.py +1 -1
  69. inspect_ai/model/_providers/providers.py +1 -1
  70. inspect_ai/model/_providers/together.py +3 -4
  71. inspect_ai/model/_providers/util/__init__.py +2 -2
  72. inspect_ai/model/_providers/util/chatapi.py +6 -19
  73. inspect_ai/model/_providers/util/hooks.py +165 -0
  74. inspect_ai/model/_providers/vertex.py +20 -3
  75. inspect_ai/model/_providers/vllm.py +16 -19
  76. inspect_ai/scorer/_multi.py +5 -2
  77. inspect_ai/solver/_bridge/patch.py +31 -1
  78. inspect_ai/solver/_fork.py +5 -3
  79. inspect_ai/solver/_human_agent/agent.py +3 -2
  80. inspect_ai/tool/__init__.py +8 -2
  81. inspect_ai/tool/_tool_info.py +4 -90
  82. inspect_ai/tool/_tool_params.py +4 -34
  83. inspect_ai/tool/_tools/_web_search.py +30 -24
  84. inspect_ai/util/__init__.py +4 -0
  85. inspect_ai/util/_concurrency.py +5 -6
  86. inspect_ai/util/_display.py +6 -0
  87. inspect_ai/util/_json.py +170 -0
  88. inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
  89. inspect_ai/util/_sandbox/docker/docker.py +5 -0
  90. inspect_ai/util/_sandbox/environment.py +56 -9
  91. inspect_ai/util/_sandbox/service.py +12 -5
  92. inspect_ai/util/_subprocess.py +94 -113
  93. inspect_ai/util/_subtask.py +2 -4
  94. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
  95. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +99 -99
  96. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
  97. inspect_ai/_util/timeouts.py +0 -160
  98. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  99. inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
  100. inspect_ai/model/_providers/util/tracker.py +0 -92
  101. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
  102. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
  103. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,20 @@
1
- import asyncio
1
+ import functools
2
+ import io
2
3
  import os
3
4
  import shlex
4
- import sys
5
- from asyncio.subprocess import Process
5
+ from contextlib import aclosing
6
6
  from contextvars import ContextVar
7
7
  from dataclasses import dataclass
8
8
  from logging import getLogger
9
9
  from pathlib import Path
10
+ from subprocess import DEVNULL, PIPE
10
11
  from typing import AsyncGenerator, Generic, Literal, TypeVar, Union, cast, overload
11
12
 
13
+ import anyio
14
+ from anyio import open_process
15
+ from anyio.abc import ByteReceiveStream, Process
16
+
17
+ from inspect_ai._util._async import tg_collect
12
18
  from inspect_ai._util.trace import trace_action
13
19
 
14
20
  from ._concurrency import concurrency
@@ -100,128 +106,103 @@ async def subprocess(
100
106
  TimeoutError: If the specified `timeout` expires.
101
107
  """
102
108
  # resolve input
103
- input = input.encode() if isinstance(input, str) else input
109
+ input = (
110
+ input.encode()
111
+ if isinstance(input, str)
112
+ else bytes(input)
113
+ if input is not None
114
+ else None
115
+ )
104
116
 
105
- # function to run command (we may or may not run it w/ concurrency)
106
117
  async def run_command() -> AsyncGenerator[
107
118
  Union[Process, ExecResult[str], ExecResult[bytes]], None
108
119
  ]:
109
- if isinstance(args, str):
110
- proc = await asyncio.create_subprocess_shell(
111
- args,
112
- stdin=asyncio.subprocess.PIPE,
113
- stdout=asyncio.subprocess.PIPE if capture_output else None,
114
- stderr=asyncio.subprocess.PIPE if capture_output else None,
115
- cwd=cwd,
116
- env={**os.environ, **env},
117
- )
118
- else:
119
- proc = await asyncio.create_subprocess_exec(
120
- args[0],
121
- *args[1:],
122
- stdin=asyncio.subprocess.PIPE,
123
- stdout=asyncio.subprocess.PIPE if capture_output else None,
124
- stderr=asyncio.subprocess.PIPE if capture_output else None,
125
- cwd=cwd,
126
- env={**os.environ, **env},
120
+ async with await open_process(
121
+ args,
122
+ stdin=PIPE if input else DEVNULL,
123
+ stdout=PIPE if capture_output else None,
124
+ stderr=PIPE if capture_output else None,
125
+ cwd=cwd,
126
+ env={**os.environ, **env},
127
+ ) as process:
128
+ # yield the process so the caller has a handle to it
129
+ yield process
130
+
131
+ # write to stdin (convert input to bytes)
132
+ if process.stdin and input:
133
+ await process.stdin.send(input)
134
+ await process.stdin.aclose()
135
+
136
+ # read streams incrementally so we can check output limits
137
+ async def read_stream(stream: ByteReceiveStream | None) -> bytes:
138
+ # return early for no stream
139
+ if stream is None:
140
+ return bytes()
141
+
142
+ written = 0
143
+ buffer = io.BytesIO()
144
+ async for chunk in stream:
145
+ buffer.write(chunk)
146
+ written += len(chunk)
147
+ if output_limit is not None and written > output_limit:
148
+ process.kill()
149
+ break
150
+
151
+ return buffer.getvalue()
152
+
153
+ stdout, stderr = await tg_collect(
154
+ [
155
+ functools.partial(read_stream, process.stdout),
156
+ functools.partial(read_stream, process.stderr),
157
+ ]
127
158
  )
128
159
 
129
- # yield the proc
130
- yield proc
131
-
132
- # write stdin if specified
133
- if proc.stdin is not None:
134
- if input is not None:
135
- proc.stdin.write(input)
136
- await proc.stdin.drain()
137
- proc.stdin.close()
138
- await proc.stdin.wait_closed()
139
-
140
- # read streams incrementally so we can check output limits
141
- async def read_stream(stream: asyncio.StreamReader | None) -> bytes:
142
- # return early for no stream
143
- if stream is None:
144
- return bytes()
145
-
146
- # read 8k at a time
147
- output = bytearray()
148
- while True:
149
- # read chunk and terminate if we are done
150
- chunk = await stream.read(8192)
151
- if not chunk:
152
- break
153
-
154
- # append to output
155
- output.extend(chunk)
156
-
157
- # stop if we have a limit and we have exceeded it
158
- if output_limit is not None and len(output) > output_limit:
159
- proc.kill()
160
- break
161
-
162
- # return stream output
163
- return bytes(output)
164
-
165
- # wait for it to execute and yield result
166
- stdout, stderr = await asyncio.gather(
167
- read_stream(proc.stdout), read_stream(proc.stderr)
168
- )
169
- returncode = await proc.wait()
170
- success = returncode == 0
171
- if text:
172
- yield ExecResult[str](
173
- success=success,
174
- returncode=returncode,
175
- stdout=stdout.decode() if capture_output else "",
176
- stderr=stderr.decode() if capture_output else "",
177
- )
178
- else:
179
- yield ExecResult[bytes](
180
- success=success,
181
- returncode=returncode,
182
- stdout=stdout if capture_output else bytes(),
183
- stderr=stderr if capture_output else bytes(),
184
- )
160
+ returncode = await process.wait()
161
+ success = returncode == 0
162
+ if text:
163
+ yield ExecResult[str](
164
+ success=success,
165
+ returncode=returncode,
166
+ stdout=stdout.decode() if capture_output else "",
167
+ stderr=stderr.decode() if capture_output else "",
168
+ )
169
+ else:
170
+ yield ExecResult[bytes](
171
+ success=success,
172
+ returncode=returncode,
173
+ stdout=stdout if capture_output else bytes(),
174
+ stderr=stderr if capture_output else bytes(),
175
+ )
185
176
 
186
177
  # wrapper for run command that implements timeout
187
178
  async def run_command_timeout() -> Union[ExecResult[str], ExecResult[bytes]]:
188
179
  # run the command and capture the process handle
189
- rc = run_command()
190
- proc = cast(Process, await anext(rc))
191
-
192
- # await result wrapped in timeout handler if requested
193
- if timeout:
194
- try:
195
- if sys.version_info >= (3, 11):
196
- async with asyncio.timeout(timeout):
180
+ async with aclosing(run_command()) as rc:
181
+ proc = cast(Process, await anext(rc))
182
+
183
+ # await result wrapped in timeout handler if requested
184
+ if timeout:
185
+ try:
186
+ with anyio.fail_after(timeout):
197
187
  result = await anext(rc)
198
188
  return cast(Union[ExecResult[str], ExecResult[bytes]], result)
199
- else:
200
- result = await asyncio.wait_for(anext(rc), timeout=timeout)
201
- return cast(Union[ExecResult[str], ExecResult[bytes]], result)
202
- # wait_for raises asyncio.TimeoutError under Python 3.10, but TimeoutError
203
- # under Python > 3.11! asynio.timeout (introduced in Python 3.11) always
204
- # raises the standard TimeoutError
205
- except (TimeoutError, asyncio.exceptions.TimeoutError):
206
- # terminate timed out process -- try for graceful termination
207
- # then be more forceful if requied
208
- try:
209
- proc.terminate()
210
- await asyncio.sleep(2)
211
- if proc.returncode is None:
212
- proc.kill()
213
- except Exception as ex:
214
- logger.warning(
215
- f"Unexpected error terminating timed out process '{args}': {ex}"
216
- )
217
-
218
- # raise standard Python TimeoutError
219
- raise TimeoutError
220
-
221
- # await result without timeout
222
- else:
223
- result = await anext(rc)
224
- return cast(Union[ExecResult[str], ExecResult[bytes]], result)
189
+ except TimeoutError:
190
+ # terminate timed out process -- try for graceful termination
191
+ # then be more forceful if requied
192
+ with anyio.CancelScope(shield=True):
193
+ try:
194
+ proc.terminate()
195
+ await anyio.sleep(2)
196
+ if proc.returncode is None:
197
+ proc.kill()
198
+ except Exception:
199
+ pass
200
+ raise
201
+
202
+ # await result without timeout
203
+ else:
204
+ result = await anext(rc)
205
+ return cast(Union[ExecResult[str], ExecResult[bytes]], result)
225
206
 
226
207
  # run command
227
208
  async with concurrency("subprocesses", max_subprocesses_context_var.get()):
@@ -1,4 +1,3 @@
1
- import asyncio
2
1
  import inspect
3
2
  from datetime import datetime
4
3
  from functools import wraps
@@ -13,7 +12,7 @@ from typing import (
13
12
  runtime_checkable,
14
13
  )
15
14
 
16
- from inspect_ai._util._async import is_callable_coroutine
15
+ from inspect_ai._util._async import is_callable_coroutine, tg_collect
17
16
  from inspect_ai._util.content import Content
18
17
  from inspect_ai._util.trace import trace_action
19
18
  from inspect_ai._util.working import sample_waiting_time
@@ -139,8 +138,7 @@ def subtask(
139
138
  transcript()._event(event)
140
139
 
141
140
  # create and run the task as a coroutine
142
- asyncio_task = asyncio.create_task(run())
143
- result, events = await asyncio_task
141
+ result, events = (await tg_collect([run]))[0]
144
142
 
145
143
  # time accounting
146
144
  completed = datetime.now()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: inspect_ai
3
- Version: 0.3.72
3
+ Version: 0.3.73
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -21,11 +21,12 @@ Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: aiohttp>=3.9.0
24
- Requires-Dist: anyio>=4.4.0
24
+ Requires-Dist: anyio>=4.8.0
25
25
  Requires-Dist: beautifulsoup4
26
26
  Requires-Dist: click>=8.1.3
27
27
  Requires-Dist: debugpy
28
28
  Requires-Dist: docstring-parser>=0.16
29
+ Requires-Dist: exceptiongroup>=1.0.2; python_version < "3.11"
29
30
  Requires-Dist: fsspec<=2024.12.0,>=2023.1.0
30
31
  Requires-Dist: httpx
31
32
  Requires-Dist: ijson>=3.2.0
@@ -44,6 +45,7 @@ Requires-Dist: rich>=13.3.3
44
45
  Requires-Dist: s3fs>=2023
45
46
  Requires-Dist: semver>=3.0.0
46
47
  Requires-Dist: shortuuid
48
+ Requires-Dist: sniffio
47
49
  Requires-Dist: tenacity
48
50
  Requires-Dist: textual>=0.86.2
49
51
  Requires-Dist: typing_extensions>=4.9.0
@@ -73,6 +75,7 @@ Requires-Dist: pytest-dotenv; extra == "dev"
73
75
  Requires-Dist: pytest-xdist; extra == "dev"
74
76
  Requires-Dist: ruff==0.9.6; extra == "dev"
75
77
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
78
+ Requires-Dist: trio; extra == "dev"
76
79
  Requires-Dist: types-Markdown; extra == "dev"
77
80
  Requires-Dist: types-PyYAML; extra == "dev"
78
81
  Requires-Dist: types-beautifulsoup4; extra == "dev"
@@ -89,6 +92,7 @@ Requires-Dist: quarto-cli==1.5.57; extra == "doc"
89
92
  Requires-Dist: jupyter; extra == "doc"
90
93
  Requires-Dist: panflute; extra == "doc"
91
94
  Requires-Dist: markdown; extra == "doc"
95
+ Requires-Dist: griffe; extra == "doc"
92
96
  Provides-Extra: dist
93
97
  Requires-Dist: twine; extra == "dist"
94
98
  Requires-Dist: build; extra == "dist"