inspect-ai 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +3 -2
- inspect_ai/_cli/cache.py +1 -1
- inspect_ai/_cli/common.py +15 -0
- inspect_ai/_cli/eval.py +4 -5
- inspect_ai/_cli/log.py +1 -1
- inspect_ai/_cli/sandbox.py +1 -1
- inspect_ai/_cli/trace.py +1 -1
- inspect_ai/_cli/view.py +1 -1
- inspect_ai/_display/core/config.py +3 -1
- inspect_ai/_eval/eval.py +55 -61
- inspect_ai/_eval/evalset.py +64 -154
- inspect_ai/_eval/loader.py +27 -54
- inspect_ai/_eval/registry.py +4 -15
- inspect_ai/_eval/run.py +7 -4
- inspect_ai/_eval/task/__init__.py +8 -2
- inspect_ai/_eval/task/log.py +9 -1
- inspect_ai/_eval/task/resolved.py +35 -0
- inspect_ai/_eval/task/run.py +4 -0
- inspect_ai/_eval/task/task.py +50 -69
- inspect_ai/_eval/task/tasks.py +30 -0
- inspect_ai/_util/constants.py +3 -0
- inspect_ai/_util/dotenv.py +17 -0
- inspect_ai/_util/logger.py +3 -0
- inspect_ai/_util/registry.py +43 -2
- inspect_ai/_view/server.py +28 -10
- inspect_ai/_view/www/dist/assets/index.css +32 -19
- inspect_ai/_view/www/dist/assets/index.js +17682 -29989
- inspect_ai/_view/www/log-schema.json +79 -9
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/appearance/styles.ts +6 -5
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +2 -2
- inspect_ai/_view/www/src/constants.ts +3 -0
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +141 -20
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +2 -1
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +7 -5
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +1 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +3 -1
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +5 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +5 -1
- inspect_ai/_view/www/src/types/log.d.ts +11 -5
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +17 -12
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -1
- inspect_ai/_view/www/yarn.lock +12 -5
- inspect_ai/log/_log.py +10 -1
- inspect_ai/log/_recorders/eval.py +27 -8
- inspect_ai/log/_recorders/json.py +10 -2
- inspect_ai/log/_transcript.py +13 -4
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +15 -1
- inspect_ai/model/_model.py +30 -12
- inspect_ai/model/_model_output.py +6 -1
- inspect_ai/model/_openai.py +11 -6
- inspect_ai/model/_providers/anthropic.py +167 -77
- inspect_ai/model/_providers/google.py +6 -2
- inspect_ai/model/_providers/none.py +31 -0
- inspect_ai/model/_providers/openai.py +11 -8
- inspect_ai/model/_providers/providers.py +7 -0
- inspect_ai/model/_providers/vertex.py +5 -2
- inspect_ai/solver/_bridge/bridge.py +1 -1
- inspect_ai/solver/_chain.py +7 -6
- inspect_ai/tool/__init__.py +4 -0
- inspect_ai/tool/_tool_call.py +5 -2
- inspect_ai/tool/_tool_support_helpers.py +200 -0
- inspect_ai/tool/_tools/_bash_session.py +119 -0
- inspect_ai/tool/_tools/_computer/_computer.py +1 -1
- inspect_ai/tool/_tools/_text_editor.py +121 -0
- inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
- inspect_ai/tool/_tools/_web_search.py +2 -2
- inspect_ai/util/_json.py +28 -0
- inspect_ai/util/_sandbox/context.py +18 -8
- inspect_ai/util/_sandbox/docker/config.py +1 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -3
- inspect_ai/util/_sandbox/environment.py +17 -2
- {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/METADATA +8 -5
- {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/RECORD +85 -108
- {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/WHEEL +1 -1
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
- {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info/licenses}/LICENSE +0 -0
- {inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,40 @@
|
|
1
1
|
import re
|
2
|
-
from textwrap import dedent
|
3
2
|
|
4
|
-
from pydantic import Field
|
3
|
+
from pydantic import BaseModel, Field
|
5
4
|
|
6
5
|
from inspect_ai._util.content import ContentText
|
7
6
|
from inspect_ai._util.error import PrerequisiteError
|
8
7
|
from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
|
9
8
|
from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
|
10
9
|
from inspect_ai.tool._tool_info import parse_tool_info
|
10
|
+
from inspect_ai.tool._tool_support_helpers import (
|
11
|
+
exec_sandbox_rpc,
|
12
|
+
tool_container_sandbox,
|
13
|
+
)
|
11
14
|
from inspect_ai.tool._tool_with import tool_with
|
12
|
-
from inspect_ai.util._sandbox import SandboxEnvironment, sandbox_with
|
13
|
-
from inspect_ai.util._sandbox.docker.internal import INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB
|
14
15
|
from inspect_ai.util._store_model import StoreModel, store_as
|
15
16
|
|
17
|
+
from ._back_compat import old_web_browser_cmd
|
18
|
+
|
19
|
+
|
20
|
+
# These two models are cloned from the container code. If/when we decide to create
|
21
|
+
# a package that is shared between the inspect and tool-container codebases, we'll
|
22
|
+
# just have to live with it.
|
23
|
+
class NewSessionResult(BaseModel):
|
24
|
+
session_name: str
|
25
|
+
|
26
|
+
|
27
|
+
class CrawlerResult(BaseModel):
|
28
|
+
web_url: str
|
29
|
+
main_content: str | None = None
|
30
|
+
web_at: str
|
31
|
+
error: str | None = None
|
32
|
+
|
16
33
|
|
17
34
|
def web_browser(interactive: bool = True) -> list[Tool]:
|
18
35
|
"""Tools used for web browser navigation.
|
19
36
|
|
20
|
-
See documentation at <https://inspect.
|
37
|
+
See documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
|
21
38
|
|
22
39
|
Args:
|
23
40
|
interactive: Provide interactive tools (enable
|
@@ -85,7 +102,7 @@ def web_browser_go() -> Tool:
|
|
85
102
|
Returns:
|
86
103
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
87
104
|
"""
|
88
|
-
return await
|
105
|
+
return await _web_browser_cmd("web_go", locals())
|
89
106
|
|
90
107
|
return execute
|
91
108
|
|
@@ -165,7 +182,7 @@ def web_browser_click() -> Tool:
|
|
165
182
|
Returns:
|
166
183
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
167
184
|
"""
|
168
|
-
return await
|
185
|
+
return await _web_browser_cmd("web_click", locals())
|
169
186
|
|
170
187
|
return execute
|
171
188
|
|
@@ -203,7 +220,7 @@ def web_browser_type_submit() -> Tool:
|
|
203
220
|
Returns:
|
204
221
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
205
222
|
"""
|
206
|
-
return await
|
223
|
+
return await _web_browser_cmd("web_type_submit", locals())
|
207
224
|
|
208
225
|
return execute
|
209
226
|
|
@@ -241,7 +258,7 @@ def web_browser_type() -> Tool:
|
|
241
258
|
Returns:
|
242
259
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
243
260
|
"""
|
244
|
-
return await
|
261
|
+
return await _web_browser_cmd("web_type", locals())
|
245
262
|
|
246
263
|
return execute
|
247
264
|
|
@@ -271,7 +288,7 @@ def web_browser_scroll() -> Tool:
|
|
271
288
|
Returns:
|
272
289
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
273
290
|
"""
|
274
|
-
return await
|
291
|
+
return await _web_browser_cmd("web_scroll", locals())
|
275
292
|
|
276
293
|
return execute
|
277
294
|
|
@@ -292,7 +309,7 @@ def web_browser_back() -> Tool:
|
|
292
309
|
Returns:
|
293
310
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
294
311
|
"""
|
295
|
-
return await
|
312
|
+
return await _web_browser_cmd("web_back", locals())
|
296
313
|
|
297
314
|
return execute
|
298
315
|
|
@@ -313,7 +330,7 @@ def web_browser_forward() -> Tool:
|
|
313
330
|
Returns:
|
314
331
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
315
332
|
"""
|
316
|
-
return await
|
333
|
+
return await _web_browser_cmd("web_forward", locals())
|
317
334
|
|
318
335
|
return execute
|
319
336
|
|
@@ -334,133 +351,61 @@ def web_browser_refresh() -> Tool:
|
|
334
351
|
Returns:
|
335
352
|
Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
|
336
353
|
"""
|
337
|
-
return await
|
354
|
+
return await _web_browser_cmd("web_refresh", locals())
|
338
355
|
|
339
356
|
return execute
|
340
357
|
|
341
358
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
359
|
+
async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolResult:
|
360
|
+
try:
|
361
|
+
sandbox_env = await tool_container_sandbox("web browser")
|
362
|
+
except PrerequisiteError as e:
|
363
|
+
# The user may have the old, incompatible, sandbox. If so, use that and
|
364
|
+
# execute the old compatible code.
|
365
|
+
try:
|
366
|
+
return await old_web_browser_cmd(tool_name, *params)
|
367
|
+
except PrerequisiteError:
|
368
|
+
raise e
|
369
|
+
|
370
|
+
store = store_as(WebBrowserStore)
|
371
|
+
|
372
|
+
if not store.session_id:
|
373
|
+
store.session_id = (
|
374
|
+
await exec_sandbox_rpc(
|
375
|
+
sandbox_env,
|
376
|
+
"web_new_session",
|
377
|
+
{"headful": False},
|
378
|
+
NewSessionResult,
|
354
379
|
)
|
380
|
+
).session_name
|
355
381
|
|
356
|
-
|
357
|
-
raise RuntimeError(
|
358
|
-
f"Error creating new web browser session: {result.stderr}"
|
359
|
-
)
|
360
|
-
|
361
|
-
store.session_id = result.stdout.strip("\n")
|
382
|
+
params["session_name"] = store.session_id
|
362
383
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
arg_list = None
|
369
|
-
if session_flag:
|
370
|
-
arg_list = ["python3", WEB_CLIENT_REQUEST, session_flag, cmd] + list(args)
|
384
|
+
crawler_result = await exec_sandbox_rpc(
|
385
|
+
sandbox_env, tool_name, params, CrawlerResult
|
386
|
+
)
|
387
|
+
if crawler_result.error and crawler_result.error.strip() != "":
|
388
|
+
raise ToolError(crawler_result.error)
|
371
389
|
else:
|
372
|
-
|
390
|
+
main_content = crawler_result.main_content
|
391
|
+
web_at = crawler_result.web_at or "(no web accessibility tree available)"
|
392
|
+
# Remove base64 data from images.
|
393
|
+
web_at_lines = web_at.split("\n")
|
394
|
+
web_at_lines = [
|
395
|
+
line.partition("data:image/png;base64")[0] for line in web_at_lines
|
396
|
+
]
|
373
397
|
|
374
|
-
|
375
|
-
|
376
|
-
raise RuntimeError(
|
377
|
-
f"Error executing web browser command {cmd}({', '.join(args)}): {result.stderr}"
|
398
|
+
store_as(WebBrowserStore).main_content = (
|
399
|
+
main_content or "(no main text summary)"
|
378
400
|
)
|
379
|
-
|
380
|
-
response = parse_web_browser_output(result.stdout)
|
381
|
-
if "error" in response and response.get("error", "").strip() != "":
|
382
|
-
raise ToolError(str(response.get("error")) or "(unknown error)")
|
383
|
-
elif "web_at" in response:
|
384
|
-
main_content = str(response.get("main_content")) or None
|
385
|
-
web_at = (
|
386
|
-
str(response.get("web_at")) or "(no web accessibility tree available)"
|
387
|
-
)
|
388
|
-
# Remove base64 data from images.
|
389
|
-
web_at_lines = web_at.split("\n")
|
390
|
-
web_at_lines = [
|
391
|
-
line.partition("data:image/png;base64")[0] for line in web_at_lines
|
392
|
-
]
|
401
|
+
store_as(WebBrowserStore).web_at = web_at
|
393
402
|
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
ContentText(text=f"main content:\n{main_content}\n\n"),
|
403
|
-
ContentText(text=f"accessibility tree:\n{web_at}"),
|
404
|
-
]
|
405
|
-
if main_content
|
406
|
-
else web_at
|
407
|
-
)
|
408
|
-
else:
|
409
|
-
raise RuntimeError(
|
410
|
-
f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
|
411
|
-
)
|
412
|
-
|
413
|
-
|
414
|
-
async def web_browser_sandbox() -> SandboxEnvironment:
|
415
|
-
sb = await sandbox_with(WEB_CLIENT_REQUEST)
|
416
|
-
if sb:
|
417
|
-
return sb
|
418
|
-
else:
|
419
|
-
msg = dedent(f"""
|
420
|
-
The web browser service was not found in any of the sandboxes for this sample. Please add the web browser service to your configuration. For example, the following Docker compose file uses the {INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB} image as its default sandbox:
|
421
|
-
|
422
|
-
services:
|
423
|
-
default:
|
424
|
-
image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB}"
|
425
|
-
init: true
|
426
|
-
|
427
|
-
Alternatively, this Docker compose file creates a dedicated image for the web browser service:
|
428
|
-
|
429
|
-
services:
|
430
|
-
default:
|
431
|
-
image: "python:3.12-bookworm"
|
432
|
-
init: true
|
433
|
-
command: "tail -f /dev/null"
|
434
|
-
|
435
|
-
web_browser:
|
436
|
-
image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB}"
|
437
|
-
init: true
|
438
|
-
""").strip()
|
439
|
-
raise PrerequisiteError(msg)
|
440
|
-
|
441
|
-
|
442
|
-
def parse_web_browser_output(output: str) -> dict[str, str]:
|
443
|
-
response: dict[str, str] = dict(
|
444
|
-
web_url="", main_content="", web_at="", info="", error=""
|
445
|
-
)
|
446
|
-
active_field: str | None = None
|
447
|
-
active_field_lines: list[str] = []
|
448
|
-
|
449
|
-
def collect_active_field() -> None:
|
450
|
-
if active_field is not None:
|
451
|
-
response[active_field] = "\n".join(active_field_lines)
|
452
|
-
active_field_lines.clear()
|
453
|
-
|
454
|
-
for line in output.splitlines():
|
455
|
-
field_match = re.match(
|
456
|
-
r"^(error|main_content|web_at|web_url|info)\s*:\s*(.+)$", line
|
403
|
+
web_at = "\n".join(web_at_lines)
|
404
|
+
return (
|
405
|
+
[
|
406
|
+
ContentText(text=f"main content:\n{main_content}\n\n"),
|
407
|
+
ContentText(text=f"accessibility tree:\n{web_at}"),
|
408
|
+
]
|
409
|
+
if main_content
|
410
|
+
else web_at
|
457
411
|
)
|
458
|
-
if field_match:
|
459
|
-
collect_active_field()
|
460
|
-
active_field = field_match.group(1)
|
461
|
-
active_field_lines.append(field_match.group(2))
|
462
|
-
else:
|
463
|
-
active_field_lines.append(line)
|
464
|
-
collect_active_field()
|
465
|
-
|
466
|
-
return response
|
@@ -52,7 +52,7 @@ def web_search(
|
|
52
52
|
A web search is conducted using the specified provider, the results are parsed for relevance
|
53
53
|
using the specified model, and the top 'num_results' relevant pages are returned.
|
54
54
|
|
55
|
-
See further documentation at <https://inspect.
|
55
|
+
See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
|
56
56
|
|
57
57
|
Args:
|
58
58
|
provider: Search provider (defaults to "google", currently
|
@@ -190,7 +190,7 @@ def google_search_provider(client: httpx.AsyncClient) -> SearchProvider:
|
|
190
190
|
google_cse_id = os.environ.get("GOOGLE_CSE_ID", None)
|
191
191
|
if not google_api_key or not google_cse_id:
|
192
192
|
raise PrerequisiteError(
|
193
|
-
"GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.
|
193
|
+
"GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.aisi.org.uk/tools.html#google-provider"
|
194
194
|
)
|
195
195
|
|
196
196
|
async def search(query: str, start_idx: int) -> list[SearchLink]:
|
inspect_ai/util/_json.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import types
|
2
2
|
import typing
|
3
|
+
from copy import deepcopy
|
3
4
|
from dataclasses import is_dataclass
|
4
5
|
from typing import (
|
5
6
|
Any,
|
@@ -10,6 +11,7 @@ from typing import (
|
|
10
11
|
Tuple,
|
11
12
|
Type,
|
12
13
|
Union,
|
14
|
+
cast,
|
13
15
|
get_args,
|
14
16
|
get_origin,
|
15
17
|
get_type_hints,
|
@@ -127,6 +129,7 @@ def cls_json_schema(cls: Type[Any]) -> JSONSchema:
|
|
127
129
|
required.append(name)
|
128
130
|
elif isinstance(cls, type) and issubclass(cls, BaseModel):
|
129
131
|
schema = cls.model_json_schema()
|
132
|
+
schema = resolve_schema_references(schema)
|
130
133
|
for name, prop in schema.get("properties", {}).items():
|
131
134
|
properties[name] = JSONSchema(**prop)
|
132
135
|
required = schema.get("required", [])
|
@@ -168,3 +171,28 @@ def python_type_to_json_type(python_type: str | None) -> JSONType:
|
|
168
171
|
raise ValueError(
|
169
172
|
f"Unsupported type: {python_type} for Python to JSON conversion."
|
170
173
|
)
|
174
|
+
|
175
|
+
|
176
|
+
def resolve_schema_references(schema: dict[str, Any]) -> dict[str, Any]:
|
177
|
+
"""Resolves all $ref references in a JSON schema by inlining the definitions."""
|
178
|
+
schema = deepcopy(schema)
|
179
|
+
definitions = schema.pop("$defs", {})
|
180
|
+
|
181
|
+
def _resolve_refs(obj: Any) -> Any:
|
182
|
+
if isinstance(obj, dict):
|
183
|
+
if "$ref" in obj and obj["$ref"].startswith("#/$defs/"):
|
184
|
+
ref_key = obj["$ref"].split("/")[-1]
|
185
|
+
if ref_key in definitions:
|
186
|
+
# Replace with a deep copy of the definition
|
187
|
+
resolved = deepcopy(definitions[ref_key])
|
188
|
+
# Process any nested references in the definition
|
189
|
+
return _resolve_refs(resolved)
|
190
|
+
|
191
|
+
# Process all entries in the dictionary
|
192
|
+
return {k: _resolve_refs(v) for k, v in obj.items()}
|
193
|
+
elif isinstance(obj, list):
|
194
|
+
return [_resolve_refs(item) for item in obj]
|
195
|
+
else:
|
196
|
+
return obj
|
197
|
+
|
198
|
+
return cast(dict[str, Any], _resolve_refs(schema))
|
@@ -49,11 +49,14 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
|
|
49
49
|
return environment
|
50
50
|
|
51
51
|
|
52
|
-
async def sandbox_with(file: str) -> SandboxEnvironment | None:
|
52
|
+
async def sandbox_with(file: str, on_path: bool = False) -> SandboxEnvironment | None:
|
53
53
|
"""Get the SandboxEnvironment for the current sample that has the specified file.
|
54
54
|
|
55
55
|
Args:
|
56
|
-
file (str): Path to file to check for.
|
56
|
+
file (str): Path to file to check for if on_path is False. If on_path is
|
57
|
+
True, file should be a filename that exists on the system path.
|
58
|
+
on_path (bool): If True, file is a filename to be verified using "which".
|
59
|
+
If False, file is a path to be checked within the sandbox environments.
|
57
60
|
|
58
61
|
Return:
|
59
62
|
SandboxEnvironment instance or None if no sandboxes had the file.
|
@@ -66,19 +69,25 @@ async def sandbox_with(file: str) -> SandboxEnvironment | None:
|
|
66
69
|
if environments_with is None:
|
67
70
|
raise_no_sandbox()
|
68
71
|
|
69
|
-
# if we've already
|
70
|
-
|
72
|
+
# if we've already discovered the sandbox for this file then return it
|
73
|
+
environment_with_key = f"{file}:{on_path}"
|
74
|
+
environment = environments_with.get(environment_with_key, None)
|
71
75
|
if environment is not None:
|
72
76
|
return environment
|
73
77
|
|
74
78
|
# look in each sandbox
|
75
79
|
for _, environment in environments.items():
|
76
80
|
try:
|
77
|
-
|
78
|
-
|
81
|
+
if on_path:
|
82
|
+
# can we find the file on the path?
|
83
|
+
if not (await environment.exec(["which", file])).success:
|
84
|
+
continue
|
85
|
+
else:
|
86
|
+
# can we read the file?
|
87
|
+
await environment.read_file(file)
|
79
88
|
|
80
89
|
# if so this is our environment, cache and return it
|
81
|
-
environments_with[
|
90
|
+
environments_with[environment_with_key] = environment
|
82
91
|
return environment
|
83
92
|
|
84
93
|
# allow exception types known to be raised from read_file
|
@@ -192,7 +201,8 @@ async def copy_sandbox_environment_files(
|
|
192
201
|
target_env = environments.get(envname, None)
|
193
202
|
if not target_env:
|
194
203
|
raise RuntimeError(
|
195
|
-
f"Environment referenced in sample file not found: '{envname}:{file}'"
|
204
|
+
f"Environment referenced in sample file not found: '{envname}:{file}'. "
|
205
|
+
+ "Note that ':' can be optionally used to specify an explicit environment name for sample files (e.g. 'envname:file') so cannot be used as a character within filenames."
|
196
206
|
)
|
197
207
|
else:
|
198
208
|
target_env = default_environment
|
@@ -82,7 +82,7 @@ COMPOSE_COMMENT = """# inspect auto-generated docker compose file
|
|
82
82
|
COMPOSE_GENERIC_YAML = f"""{COMPOSE_COMMENT}
|
83
83
|
services:
|
84
84
|
default:
|
85
|
-
image: "
|
85
|
+
image: "aisiuk/inspect-tool-support"
|
86
86
|
command: "tail -f /dev/null"
|
87
87
|
init: true
|
88
88
|
network_mode: none
|
@@ -3,13 +3,13 @@ from inspect_ai._util.error import PrerequisiteError
|
|
3
3
|
from inspect_ai.util._display import display_type
|
4
4
|
from inspect_ai.util._subprocess import subprocess
|
5
5
|
|
6
|
-
|
6
|
+
INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB_DEPRECATED = "aisiuk/inspect-web-browser-tool"
|
7
7
|
|
8
|
-
|
8
|
+
INSPECT_WEB_BROWSER_IMAGE_DEPRECATED = "inspect_web_browser"
|
9
9
|
INSPECT_COMPUTER_IMAGE = "inspect-computer-tool"
|
10
10
|
|
11
11
|
INTERNAL_IMAGES = {
|
12
|
-
|
12
|
+
INSPECT_WEB_BROWSER_IMAGE_DEPRECATED: PKG_PATH
|
13
13
|
/ "tool"
|
14
14
|
/ "_tools"
|
15
15
|
/ "_web_browser"
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import abc
|
4
|
+
import logging
|
4
5
|
from dataclasses import dataclass, field
|
5
6
|
from typing import (
|
6
7
|
Annotated,
|
@@ -17,8 +18,12 @@ from typing import (
|
|
17
18
|
|
18
19
|
from pydantic import BaseModel, Field, model_validator
|
19
20
|
|
21
|
+
from inspect_ai._util.logger import warn_once
|
22
|
+
|
20
23
|
from .._subprocess import ExecResult
|
21
24
|
|
25
|
+
logger = logging.getLogger(__name__)
|
26
|
+
|
22
27
|
ST = TypeVar("ST", bound="SandboxEnvironment")
|
23
28
|
|
24
29
|
TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
|
@@ -381,11 +386,21 @@ def resolve_sandbox_environment(
|
|
381
386
|
return None
|
382
387
|
|
383
388
|
|
384
|
-
def deserialize_sandbox_specific_config(
|
389
|
+
def deserialize_sandbox_specific_config(
|
390
|
+
type: str, config: dict[str, Any]
|
391
|
+
) -> BaseModel | dict[str, Any]:
|
385
392
|
# Avoid circular import
|
386
393
|
from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
|
387
394
|
|
388
|
-
|
395
|
+
try:
|
396
|
+
sandboxenv_type = registry_find_sandboxenv(type)
|
397
|
+
except ValueError:
|
398
|
+
warn_once(
|
399
|
+
logger,
|
400
|
+
f"Could not find sandbox environment plugin for type '{type}'. "
|
401
|
+
"Ensure the plugin is installed in your environment.",
|
402
|
+
)
|
403
|
+
return config
|
389
404
|
config_deserialize = cast(
|
390
405
|
ConfigDeserialize, getattr(sandboxenv_type, "config_deserialize")
|
391
406
|
)
|
@@ -1,10 +1,10 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.76
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
7
|
-
Project-URL: Documentation, https://inspect.
|
7
|
+
Project-URL: Documentation, https://inspect.aisi.org.uk/
|
8
8
|
Project-URL: Source Code, https://github.com/UKGovernmentBEIS/inspect_ai
|
9
9
|
Project-URL: Issue Tracker, https://github.com/UKGovernmentBEIS/inspect_ai/issues
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
@@ -60,11 +60,13 @@ Requires-Dist: goodfire; extra == "dev"
|
|
60
60
|
Requires-Dist: griffe; extra == "dev"
|
61
61
|
Requires-Dist: groq; extra == "dev"
|
62
62
|
Requires-Dist: ipython; extra == "dev"
|
63
|
+
Requires-Dist: markdown; extra == "dev"
|
63
64
|
Requires-Dist: mistralai; extra == "dev"
|
64
65
|
Requires-Dist: moto[server]; extra == "dev"
|
65
66
|
Requires-Dist: mypy; extra == "dev"
|
66
67
|
Requires-Dist: nbformat; extra == "dev"
|
67
68
|
Requires-Dist: openai; extra == "dev"
|
69
|
+
Requires-Dist: panflute; extra == "dev"
|
68
70
|
Requires-Dist: pip; extra == "dev"
|
69
71
|
Requires-Dist: pre-commit; extra == "dev"
|
70
72
|
Requires-Dist: pylint; extra == "dev"
|
@@ -96,14 +98,15 @@ Requires-Dist: griffe; extra == "doc"
|
|
96
98
|
Provides-Extra: dist
|
97
99
|
Requires-Dist: twine; extra == "dist"
|
98
100
|
Requires-Dist: build; extra == "dist"
|
101
|
+
Dynamic: license-file
|
99
102
|
|
100
|
-
[<img width="295" src="https://inspect.
|
103
|
+
[<img width="295" src="https://inspect.aisi.org.uk/images/aisi-logo.svg" />](https://aisi.gov.uk/)
|
101
104
|
|
102
105
|
Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk/).
|
103
106
|
|
104
107
|
Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages.
|
105
108
|
|
106
|
-
To get started with Inspect, please see the documentation at <https://inspect.
|
109
|
+
To get started with Inspect, please see the documentation at <https://inspect.aisi.org.uk/>.
|
107
110
|
|
108
111
|
***
|
109
112
|
|