inspect-ai 0.3.53__py3-none-any.whl → 0.3.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +26 -1
- inspect_ai/_cli/main.py +2 -0
- inspect_ai/_cli/trace.py +244 -0
- inspect_ai/_display/textual/app.py +5 -1
- inspect_ai/_display/textual/widgets/tasks.py +13 -3
- inspect_ai/_eval/eval.py +17 -0
- inspect_ai/_eval/task/images.py +4 -14
- inspect_ai/_eval/task/log.py +2 -1
- inspect_ai/_eval/task/run.py +26 -10
- inspect_ai/_util/constants.py +3 -3
- inspect_ai/_util/display.py +1 -0
- inspect_ai/_util/logger.py +34 -8
- inspect_ai/_util/trace.py +275 -0
- inspect_ai/log/_log.py +3 -0
- inspect_ai/log/_message.py +2 -2
- inspect_ai/log/_recorders/eval.py +6 -17
- inspect_ai/log/_recorders/json.py +19 -17
- inspect_ai/model/_cache.py +22 -16
- inspect_ai/model/_call_tools.py +9 -1
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +11 -12
- inspect_ai/model/_providers/bedrock.py +1 -1
- inspect_ai/model/_providers/openai.py +11 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
- inspect_ai/util/_sandbox/context.py +6 -1
- inspect_ai/util/_sandbox/docker/compose.py +58 -19
- inspect_ai/util/_sandbox/docker/docker.py +11 -11
- inspect_ai/util/_sandbox/docker/util.py +0 -6
- inspect_ai/util/_sandbox/service.py +17 -7
- inspect_ai/util/_subprocess.py +6 -1
- inspect_ai/util/_subtask.py +8 -2
- {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/METADATA +7 -7
- {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/RECORD +37 -35
- {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.53.dist-info → inspect_ai-0.3.55.dist-info}/top_level.txt +0 -0
@@ -58,7 +58,7 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
58
58
|
"""How many chat completion choices to generate for each input message. OpenAI, Grok, Google, and TogetherAI only."""
|
59
59
|
|
60
60
|
logprobs: bool | None
|
61
|
-
"""Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, and
|
61
|
+
"""Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
|
62
62
|
|
63
63
|
top_logprobs: int | None
|
64
64
|
"""Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, and Huggingface only."""
|
@@ -128,7 +128,7 @@ class GenerateConfig(BaseModel):
|
|
128
128
|
"""How many chat completion choices to generate for each input message. OpenAI, Grok, Google, TogetherAI, and vLLM only."""
|
129
129
|
|
130
130
|
logprobs: bool | None = Field(default=None)
|
131
|
-
"""Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, and vLLM only."""
|
131
|
+
"""Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
|
132
132
|
|
133
133
|
top_logprobs: int | None = Field(default=None)
|
134
134
|
"""Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, Huggingface, and vLLM only."""
|
inspect_ai/model/_model.py
CHANGED
@@ -9,7 +9,6 @@ from contextvars import ContextVar
|
|
9
9
|
from copy import deepcopy
|
10
10
|
from typing import Any, Callable, Literal, Type, cast
|
11
11
|
|
12
|
-
from shortuuid import uuid
|
13
12
|
from tenacity import (
|
14
13
|
retry,
|
15
14
|
retry_if_exception,
|
@@ -30,6 +29,7 @@ from inspect_ai._util.registry import (
|
|
30
29
|
registry_unqualified_name,
|
31
30
|
)
|
32
31
|
from inspect_ai._util.retry import log_rate_limit_retry
|
32
|
+
from inspect_ai._util.trace import trace_action
|
33
33
|
from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
|
34
34
|
from inspect_ai.tool._tool_def import ToolDef, tool_defs
|
35
35
|
from inspect_ai.util import concurrency
|
@@ -363,17 +363,16 @@ class Model:
|
|
363
363
|
cache="write" if cache else None,
|
364
364
|
)
|
365
365
|
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
logger.debug(f"model generate {generate_id} (completed)")
|
366
|
+
with trace_action(logger, "Model", f"generate ({str(self)})"):
|
367
|
+
time_start = time.perf_counter()
|
368
|
+
result = await self.api.generate(
|
369
|
+
input=input,
|
370
|
+
tools=tools,
|
371
|
+
tool_choice=tool_choice,
|
372
|
+
config=config,
|
373
|
+
)
|
374
|
+
time_elapsed = time.perf_counter() - time_start
|
375
|
+
|
377
376
|
if isinstance(result, tuple):
|
378
377
|
output, call = result
|
379
378
|
else:
|
@@ -312,7 +312,7 @@ class BedrockAPI(ModelAPI):
|
|
312
312
|
from botocore.exceptions import ClientError
|
313
313
|
|
314
314
|
# The bedrock client
|
315
|
-
async with self.session.client(
|
315
|
+
async with self.session.client( # type: ignore[call-overload]
|
316
316
|
service_name="bedrock-runtime",
|
317
317
|
endpoint_url=self.base_url,
|
318
318
|
config=Config(
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
+
from logging import getLogger
|
3
4
|
from typing import Any
|
4
5
|
|
5
6
|
from openai import (
|
@@ -36,6 +37,7 @@ from inspect_ai._util.constants import DEFAULT_MAX_RETRIES
|
|
36
37
|
from inspect_ai._util.content import Content
|
37
38
|
from inspect_ai._util.error import PrerequisiteError
|
38
39
|
from inspect_ai._util.images import image_as_data_uri
|
40
|
+
from inspect_ai._util.logger import warn_once
|
39
41
|
from inspect_ai._util.url import is_data_uri, is_http_url
|
40
42
|
from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
|
41
43
|
|
@@ -58,6 +60,8 @@ from .util import (
|
|
58
60
|
parse_tool_call,
|
59
61
|
)
|
60
62
|
|
63
|
+
logger = getLogger(__name__)
|
64
|
+
|
61
65
|
OPENAI_API_KEY = "OPENAI_API_KEY"
|
62
66
|
AZURE_OPENAI_API_KEY = "AZURE_OPENAI_API_KEY"
|
63
67
|
AZUREAI_OPENAI_API_KEY = "AZUREAI_OPENAI_API_KEY"
|
@@ -270,7 +274,13 @@ class OpenAIAPI(ModelAPI):
|
|
270
274
|
if config.seed is not None:
|
271
275
|
params["seed"] = config.seed
|
272
276
|
if config.temperature is not None:
|
273
|
-
|
277
|
+
if self.is_o1():
|
278
|
+
warn_once(
|
279
|
+
logger,
|
280
|
+
"o1 models do not support the 'temperature' parameter (temperature is always 1).",
|
281
|
+
)
|
282
|
+
else:
|
283
|
+
params["temperature"] = config.temperature
|
274
284
|
# TogetherAPI requires temperature w/ num_choices
|
275
285
|
elif config.num_choices is not None:
|
276
286
|
params["temperature"] = 1
|
@@ -362,7 +362,7 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
|
|
362
362
|
else:
|
363
363
|
arg_list = ["python3", WEB_CLIENT_REQUEST, cmd] + list(args)
|
364
364
|
|
365
|
-
result = await sandbox_env.exec(arg_list)
|
365
|
+
result = await sandbox_env.exec(arg_list, timeout=180)
|
366
366
|
if not result.success:
|
367
367
|
raise RuntimeError(
|
368
368
|
f"Error executing web browser command {cmd}({', '.join(args)}): {result.stderr}"
|
@@ -191,7 +191,12 @@ async def setup_sandbox_environment(
|
|
191
191
|
|
192
192
|
# chmod, execute, and remove
|
193
193
|
async def exec(cmd: list[str]) -> None:
|
194
|
-
|
194
|
+
try:
|
195
|
+
result = await env.exec(cmd, timeout=30)
|
196
|
+
except TimeoutError:
|
197
|
+
raise RuntimeError(
|
198
|
+
f"Timed out executing command {' '.join(cmd)} in sandbox"
|
199
|
+
)
|
195
200
|
|
196
201
|
if not result.success:
|
197
202
|
raise RuntimeError(
|
@@ -16,7 +16,7 @@ from .prereqs import (
|
|
16
16
|
DOCKER_COMPOSE_REQUIRED_VERSION_PULL_POLICY,
|
17
17
|
validate_docker_compose,
|
18
18
|
)
|
19
|
-
from .util import ComposeProject, is_inspect_project
|
19
|
+
from .util import ComposeProject, is_inspect_project
|
20
20
|
|
21
21
|
logger = getLogger(__name__)
|
22
22
|
|
@@ -31,7 +31,9 @@ async def compose_up(project: ComposeProject) -> None:
|
|
31
31
|
project=project,
|
32
32
|
)
|
33
33
|
if not result.success:
|
34
|
-
msg =
|
34
|
+
msg = (
|
35
|
+
f"Failed to start docker services for {project.config}: " f"{result.stderr}"
|
36
|
+
)
|
35
37
|
raise RuntimeError(msg)
|
36
38
|
|
37
39
|
|
@@ -94,7 +96,10 @@ async def compose_check_running(services: list[str], project: ComposeProject) ->
|
|
94
96
|
for running_service in running_services:
|
95
97
|
unhealthy_services.remove(running_service["Service"])
|
96
98
|
|
97
|
-
msg =
|
99
|
+
msg = (
|
100
|
+
"One or more docker containers failed to start from "
|
101
|
+
f"{project.config}: {','.join(unhealthy_services)}"
|
102
|
+
)
|
98
103
|
raise RuntimeError(msg)
|
99
104
|
else:
|
100
105
|
raise RuntimeError("No services started")
|
@@ -152,8 +157,9 @@ async def compose_pull(
|
|
152
157
|
|
153
158
|
async def compose_exec(
|
154
159
|
command: list[str],
|
160
|
+
*,
|
155
161
|
project: ComposeProject,
|
156
|
-
timeout: int | None
|
162
|
+
timeout: int | None,
|
157
163
|
input: str | bytes | None = None,
|
158
164
|
output_limit: int | None = None,
|
159
165
|
) -> ExecResult[str]:
|
@@ -206,7 +212,6 @@ async def compose_cleanup_images(
|
|
206
212
|
cwd: str | None = None,
|
207
213
|
timeout: int | None = None,
|
208
214
|
) -> None:
|
209
|
-
sandbox_log("Removing images")
|
210
215
|
# List the images that would be created for this compose
|
211
216
|
images_result = await compose_command(
|
212
217
|
["config", "--images"], project=project, cwd=cwd
|
@@ -241,10 +246,14 @@ async def compose_cleanup_images(
|
|
241
246
|
logger.warning(msg)
|
242
247
|
|
243
248
|
|
249
|
+
DEFAULT_COMPOSE_TIMEOUT = 60
|
250
|
+
|
251
|
+
|
244
252
|
async def compose_command(
|
245
253
|
command: list[str],
|
254
|
+
*,
|
246
255
|
project: ComposeProject,
|
247
|
-
timeout: int | None =
|
256
|
+
timeout: int | None = DEFAULT_COMPOSE_TIMEOUT,
|
248
257
|
input: str | bytes | None = None,
|
249
258
|
cwd: str | Path | None = None,
|
250
259
|
forward_env: bool = True,
|
@@ -278,16 +287,46 @@ async def compose_command(
|
|
278
287
|
# build final command
|
279
288
|
compose_command = compose_command + command
|
280
289
|
|
281
|
-
#
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
290
|
+
# function to run command
|
291
|
+
async def run_command(command_timeout: int | None) -> ExecResult[str]:
|
292
|
+
result = await subprocess(
|
293
|
+
compose_command,
|
294
|
+
input=input,
|
295
|
+
cwd=cwd,
|
296
|
+
env=env,
|
297
|
+
timeout=command_timeout,
|
298
|
+
capture_output=capture_output,
|
299
|
+
output_limit=output_limit,
|
300
|
+
)
|
301
|
+
return result
|
302
|
+
|
303
|
+
# we have observed underlying unreliability in docker compose in some linux
|
304
|
+
# environments on EC2 -- this exhibits in very simple commands (e.g. compose config)
|
305
|
+
# simply never returning. this tends to happen when we know there is a large
|
306
|
+
# number of commands in flight (task/sample init) so could be some sort of
|
307
|
+
# timing issue / race condition in the docker daemon. we've also observed that
|
308
|
+
# these same commands succeed if you just retry them. therefore, we add some
|
309
|
+
# extra resiliance by retrying commands with a timeout once. we were observing
|
310
|
+
# commands hanging at a rate of ~ 1/1000, so we retry up to twice (tweaking the
|
311
|
+
# retry time down) to make the odds of hanging vanishingly small
|
312
|
+
|
313
|
+
if timeout is not None:
|
314
|
+
MAX_RETRIES = 2
|
315
|
+
retries = 0
|
316
|
+
while True:
|
317
|
+
try:
|
318
|
+
command_timeout = (
|
319
|
+
timeout if retries == 0 else (min(timeout, 60) // retries)
|
320
|
+
)
|
321
|
+
return await run_command(command_timeout)
|
322
|
+
except TimeoutError:
|
323
|
+
retries += 1
|
324
|
+
if retries <= MAX_RETRIES:
|
325
|
+
logger.info(
|
326
|
+
f"Retrying docker compose command: {shlex.join(compose_command)}"
|
327
|
+
)
|
328
|
+
else:
|
329
|
+
raise
|
330
|
+
|
331
|
+
else:
|
332
|
+
return await run_command(timeout)
|
@@ -42,7 +42,7 @@ from .compose import (
|
|
42
42
|
from .config import CONFIG_FILES, DOCKERFILE
|
43
43
|
from .internal import build_internal_image, is_internal_image
|
44
44
|
from .prereqs import validate_prereqs
|
45
|
-
from .util import ComposeProject,
|
45
|
+
from .util import ComposeProject, task_project_name
|
46
46
|
|
47
47
|
logger = getLogger(__name__)
|
48
48
|
|
@@ -113,8 +113,6 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
113
113
|
config: SandboxEnvironmentConfigType | None,
|
114
114
|
metadata: dict[str, str],
|
115
115
|
) -> dict[str, SandboxEnvironment]:
|
116
|
-
sandbox_log("setup")
|
117
|
-
|
118
116
|
# create environment variables for sample metadata
|
119
117
|
env: dict[str, str] = {}
|
120
118
|
if isinstance(config, str) and Path(config).exists():
|
@@ -264,7 +262,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
264
262
|
|
265
263
|
@override
|
266
264
|
async def write_file(self, file: str, contents: str | bytes) -> None:
|
267
|
-
|
265
|
+
# exec function w/ timeout
|
266
|
+
async def exec(cmd: list[str]) -> ExecResult[str]:
|
267
|
+
return await self.exec(cmd, timeout=60)
|
268
268
|
|
269
269
|
# resolve relative file paths
|
270
270
|
file = self.container_file(file)
|
@@ -311,8 +311,8 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
311
311
|
local_tmpfile.close() # this will also delete the file
|
312
312
|
|
313
313
|
if not hasattr(self, "_docker_user"):
|
314
|
-
uid = (await
|
315
|
-
gid = (await
|
314
|
+
uid = (await exec(["id", "-u"])).stdout.strip()
|
315
|
+
gid = (await exec(["id", "-g"])).stdout.strip()
|
316
316
|
self._docker_user = (uid, gid)
|
317
317
|
|
318
318
|
await compose_command(
|
@@ -331,7 +331,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
331
331
|
parent = PurePosixPath(file).parent
|
332
332
|
|
333
333
|
# We do these steps in a shell script for efficiency to avoid round-trips to docker.
|
334
|
-
res_cp = await
|
334
|
+
res_cp = await exec(
|
335
335
|
[
|
336
336
|
"sh",
|
337
337
|
"-e",
|
@@ -346,7 +346,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
346
346
|
|
347
347
|
if res_cp.returncode != 0:
|
348
348
|
if "Permission denied" in res_cp.stderr:
|
349
|
-
ls_result = await
|
349
|
+
ls_result = await exec(["ls", "-la", "."])
|
350
350
|
error_string = f"Permission was denied. Error details: {res_cp.stderr}; ls -la: {ls_result.stdout}; {self._docker_user=}"
|
351
351
|
raise PermissionError(error_string)
|
352
352
|
elif (
|
@@ -367,8 +367,6 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
367
367
|
|
368
368
|
@override
|
369
369
|
async def read_file(self, file: str, text: bool = True) -> Union[str, bytes]:
|
370
|
-
sandbox_log(f"read_file: {file}")
|
371
|
-
|
372
370
|
# Write the contents to a temp file
|
373
371
|
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
|
374
372
|
# resolve relative file paths
|
@@ -449,7 +447,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
449
447
|
async def container_working_dir(
|
450
448
|
service: str, project: ComposeProject, default: str = "/"
|
451
449
|
) -> str:
|
452
|
-
result = await compose_exec(
|
450
|
+
result = await compose_exec(
|
451
|
+
[service, "sh", "-c", "pwd"], timeout=60, project=project
|
452
|
+
)
|
453
453
|
if result.success:
|
454
454
|
return result.stdout.strip()
|
455
455
|
else:
|
@@ -5,8 +5,6 @@ from pathlib import Path
|
|
5
5
|
|
6
6
|
from shortuuid import uuid
|
7
7
|
|
8
|
-
from inspect_ai._util.constants import SANDBOX
|
9
|
-
|
10
8
|
from ..environment import SandboxEnvironmentConfigType
|
11
9
|
from .config import (
|
12
10
|
COMPOSE_DOCKERFILE_YAML,
|
@@ -94,7 +92,3 @@ inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{22}$"
|
|
94
92
|
|
95
93
|
def is_inspect_project(name: str) -> bool:
|
96
94
|
return re.match(inspect_project_pattern, name) is not None
|
97
|
-
|
98
|
-
|
99
|
-
def sandbox_log(msg: str) -> None:
|
100
|
-
logger.log(SANDBOX, f"DOCKER: {msg}")
|
@@ -10,6 +10,8 @@ from typing import (
|
|
10
10
|
|
11
11
|
from pydantic import JsonValue
|
12
12
|
|
13
|
+
from inspect_ai.util._subprocess import ExecResult
|
14
|
+
|
13
15
|
from .environment import SandboxEnvironment
|
14
16
|
|
15
17
|
REQUESTS_DIR = "requests"
|
@@ -129,9 +131,9 @@ class SandboxService:
|
|
129
131
|
"""Handle all pending service requests."""
|
130
132
|
# list pending requests
|
131
133
|
list_requests = f"ls -1 {self._requests_dir}/*.json"
|
132
|
-
result = await self.
|
134
|
+
result = await self._exec(["bash", "-c", list_requests])
|
133
135
|
|
134
|
-
# process
|
136
|
+
# process requests
|
135
137
|
if result.success:
|
136
138
|
request_files = result.stdout.strip().splitlines()
|
137
139
|
if request_files:
|
@@ -142,7 +144,7 @@ class SandboxService:
|
|
142
144
|
async def _handle_request(self, request_file: str) -> None:
|
143
145
|
# read request
|
144
146
|
read_request = f"cat {request_file}"
|
145
|
-
result = await self.
|
147
|
+
result = await self._exec(["bash", "-c", read_request])
|
146
148
|
if not result.success:
|
147
149
|
raise RuntimeError(
|
148
150
|
f"Error reading request for service {self._name}: '{read_request}' ({result.stderr})"
|
@@ -181,7 +183,7 @@ class SandboxService:
|
|
181
183
|
await self._write_text_file(response_path, json.dumps(response_data))
|
182
184
|
|
183
185
|
# remove request file
|
184
|
-
exec_rm = await self.
|
186
|
+
exec_rm = await self._exec(["rm", "-f", request_file])
|
185
187
|
if not exec_rm.success:
|
186
188
|
raise RuntimeError(
|
187
189
|
f"Error removing request file '{request_file}': {exec_rm.stderr}"
|
@@ -215,8 +217,8 @@ class SandboxService:
|
|
215
217
|
|
216
218
|
async def _create_rpc_dir(self, name: str) -> str:
|
217
219
|
rpc_dir = PurePosixPath(self._service_dir, name).as_posix()
|
218
|
-
result = await self.
|
219
|
-
result = await self.
|
220
|
+
result = await self._exec(["rm", "-rf", rpc_dir])
|
221
|
+
result = await self._exec(["mkdir", "-p", rpc_dir])
|
220
222
|
if not result.success:
|
221
223
|
raise RuntimeError(
|
222
224
|
f"Error creating rpc directory '{name}' for sandbox '{self._name}': {result.stderr}"
|
@@ -224,11 +226,19 @@ class SandboxService:
|
|
224
226
|
return rpc_dir
|
225
227
|
|
226
228
|
async def _write_text_file(self, file: str, contents: str) -> None:
|
227
|
-
result = await self.
|
229
|
+
result = await self._exec(["tee", "--", file], input=contents)
|
228
230
|
if not result.success:
|
229
231
|
msg = f"Failed to write file '{file}' into container: {result.stderr}"
|
230
232
|
raise RuntimeError(msg)
|
231
233
|
|
234
|
+
async def _exec(self, cmd: list[str], input: str | None = None) -> ExecResult[str]:
|
235
|
+
try:
|
236
|
+
return await self._sandbox.exec(cmd, input=input, timeout=30)
|
237
|
+
except TimeoutError:
|
238
|
+
raise RuntimeError(
|
239
|
+
f"Timed out executing command {' '.join(cmd)} in sandbox"
|
240
|
+
)
|
241
|
+
|
232
242
|
def _generate_client(self) -> str:
|
233
243
|
return dedent(f"""
|
234
244
|
from typing import Any
|
inspect_ai/util/_subprocess.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import os
|
3
|
+
import shlex
|
3
4
|
import sys
|
4
5
|
from asyncio.subprocess import Process
|
5
6
|
from contextvars import ContextVar
|
@@ -8,6 +9,8 @@ from logging import getLogger
|
|
8
9
|
from pathlib import Path
|
9
10
|
from typing import AsyncGenerator, Generic, Literal, TypeVar, Union, cast, overload
|
10
11
|
|
12
|
+
from inspect_ai._util.trace import trace_action
|
13
|
+
|
11
14
|
from ._concurrency import concurrency
|
12
15
|
|
13
16
|
logger = getLogger(__name__)
|
@@ -217,7 +220,9 @@ async def subprocess(
|
|
217
220
|
|
218
221
|
# run command
|
219
222
|
async with concurrency("subprocesses", max_subprocesses_context_var.get()):
|
220
|
-
|
223
|
+
message = args if isinstance(args, str) else shlex.join(args)
|
224
|
+
with trace_action(logger, "Subprocess", message):
|
225
|
+
return await run_command_timeout()
|
221
226
|
|
222
227
|
|
223
228
|
def init_max_subprocesses(max_subprocesses: int | None = None) -> None:
|
inspect_ai/util/_subtask.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import asyncio
|
2
2
|
import inspect
|
3
3
|
from functools import wraps
|
4
|
+
from logging import getLogger
|
4
5
|
from typing import (
|
5
6
|
Any,
|
6
7
|
Callable,
|
@@ -13,6 +14,7 @@ from typing import (
|
|
13
14
|
|
14
15
|
from inspect_ai._util._async import is_callable_coroutine
|
15
16
|
from inspect_ai._util.content import Content
|
17
|
+
from inspect_ai._util.trace import trace_action
|
16
18
|
from inspect_ai.util._store import Store, dict_jsonable, init_subtask_store
|
17
19
|
|
18
20
|
SubtaskResult = str | int | float | bool | list[Content]
|
@@ -20,6 +22,9 @@ SubtaskResult = str | int | float | bool | list[Content]
|
|
20
22
|
RT = TypeVar("RT", SubtaskResult, Any)
|
21
23
|
|
22
24
|
|
25
|
+
logger = getLogger(__name__)
|
26
|
+
|
27
|
+
|
23
28
|
@runtime_checkable
|
24
29
|
class Subtask(Protocol):
|
25
30
|
"""Subtask with distinct `Store` and `Transcript`.
|
@@ -118,8 +123,9 @@ def subtask(
|
|
118
123
|
init_subtask(subtask_name, store if store else Store())
|
119
124
|
|
120
125
|
# run the subtask
|
121
|
-
with
|
122
|
-
|
126
|
+
with trace_action(logger, "Subtask", subtask_name):
|
127
|
+
with track_store_changes(): # type: ignore
|
128
|
+
result = await func(*args, **kwargs)
|
123
129
|
|
124
130
|
# return result and event
|
125
131
|
return result, list(transcript().events)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.55
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Safety Institute
|
6
6
|
License: MIT License
|
@@ -67,7 +67,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
67
67
|
Requires-Dist: pytest-cov; extra == "dev"
|
68
68
|
Requires-Dist: pytest-dotenv; extra == "dev"
|
69
69
|
Requires-Dist: pytest-xdist; extra == "dev"
|
70
|
-
Requires-Dist: ruff==0.8.
|
70
|
+
Requires-Dist: ruff==0.8.4; extra == "dev"
|
71
71
|
Requires-Dist: textual-dev>=0.86.2; extra == "dev"
|
72
72
|
Requires-Dist: types-PyYAML; extra == "dev"
|
73
73
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
@@ -96,22 +96,22 @@ To get started with Inspect, please see the documentation at <https://inspect.ai
|
|
96
96
|
|
97
97
|
***
|
98
98
|
|
99
|
-
|
100
|
-
|
101
99
|
To work on development of Inspect, clone the repository and install with the `-e` flag and `[dev]` optional dependencies:
|
102
100
|
|
103
101
|
```bash
|
104
|
-
|
105
|
-
|
106
|
-
|
102
|
+
git clone https://github.com/UKGovernmentBEIS/inspect_ai.git
|
103
|
+
cd inspect_ai
|
104
|
+
pip install -e ".[dev]"
|
107
105
|
```
|
108
106
|
|
109
107
|
Optionally install pre-commit hooks via
|
108
|
+
|
110
109
|
```bash
|
111
110
|
make hooks
|
112
111
|
```
|
113
112
|
|
114
113
|
Run linting, formatting, and tests via
|
114
|
+
|
115
115
|
```bash
|
116
116
|
make check
|
117
117
|
make test
|