inspect-ai 0.3.75__py3-none-any.whl → 0.3.76__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/evalset.py +3 -2
- inspect_ai/_eval/registry.py +3 -5
- inspect_ai/_eval/run.py +4 -0
- inspect_ai/_eval/task/run.py +4 -0
- inspect_ai/_util/logger.py +3 -0
- inspect_ai/_view/www/dist/assets/index.css +28 -16
- inspect_ai/_view/www/dist/assets/index.js +4801 -4615
- inspect_ai/_view/www/log-schema.json +79 -9
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
- inspect_ai/_view/www/src/types/log.d.ts +11 -5
- inspect_ai/log/_recorders/json.py +8 -0
- inspect_ai/log/_transcript.py +13 -4
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_model.py +5 -1
- inspect_ai/model/_model_output.py +6 -1
- inspect_ai/model/_openai.py +11 -6
- inspect_ai/model/_providers/anthropic.py +133 -75
- inspect_ai/model/_providers/openai.py +11 -8
- inspect_ai/model/_providers/vertex.py +5 -2
- inspect_ai/tool/__init__.py +4 -0
- inspect_ai/tool/_tool_call.py +5 -2
- inspect_ai/tool/_tool_support_helpers.py +200 -0
- inspect_ai/tool/_tools/_bash_session.py +119 -0
- inspect_ai/tool/_tools/_computer/_computer.py +1 -1
- inspect_ai/tool/_tools/_text_editor.py +121 -0
- inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
- inspect_ai/tool/_tools/_web_search.py +1 -1
- inspect_ai/util/_json.py +28 -0
- inspect_ai/util/_sandbox/context.py +16 -7
- inspect_ai/util/_sandbox/docker/config.py +1 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -3
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/METADATA +5 -2
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/RECORD +42 -68
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/WHEEL +1 -1
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info/licenses}/LICENSE +0 -0
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,12 @@
|
|
1
1
|
import functools
|
2
2
|
import os
|
3
3
|
import re
|
4
|
-
import sys
|
5
4
|
from copy import copy
|
6
5
|
from logging import getLogger
|
7
|
-
from typing import Any, Literal, Optional, Tuple,
|
6
|
+
from typing import Any, Literal, NamedTuple, Optional, Tuple, cast
|
8
7
|
|
9
8
|
import httpcore
|
10
9
|
import httpx
|
11
|
-
|
12
|
-
from inspect_ai._util.http import is_retryable_http_status
|
13
|
-
|
14
|
-
from .util.hooks import HttpxHooks
|
15
|
-
|
16
|
-
if sys.version_info >= (3, 11):
|
17
|
-
from typing import NotRequired
|
18
|
-
else:
|
19
|
-
from typing_extensions import NotRequired
|
20
|
-
|
21
10
|
from anthropic import (
|
22
11
|
APIConnectionError,
|
23
12
|
APIStatusError,
|
@@ -39,19 +28,19 @@ from anthropic.types import (
|
|
39
28
|
TextBlockParam,
|
40
29
|
ThinkingBlock,
|
41
30
|
ThinkingBlockParam,
|
31
|
+
ToolBash20250124Param,
|
42
32
|
ToolParam,
|
43
33
|
ToolResultBlockParam,
|
34
|
+
ToolTextEditor20250124Param,
|
44
35
|
ToolUseBlock,
|
45
36
|
ToolUseBlockParam,
|
46
37
|
message_create_params,
|
47
38
|
)
|
39
|
+
from anthropic.types.beta import BetaToolComputerUse20250124Param
|
48
40
|
from pydantic import JsonValue
|
49
41
|
from typing_extensions import override
|
50
42
|
|
51
|
-
from inspect_ai._util.constants import
|
52
|
-
BASE_64_DATA_REMOVED,
|
53
|
-
NO_CONTENT,
|
54
|
-
)
|
43
|
+
from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
|
55
44
|
from inspect_ai._util.content import (
|
56
45
|
Content,
|
57
46
|
ContentImage,
|
@@ -59,6 +48,7 @@ from inspect_ai._util.content import (
|
|
59
48
|
ContentText,
|
60
49
|
)
|
61
50
|
from inspect_ai._util.error import exception_message
|
51
|
+
from inspect_ai._util.http import is_retryable_http_status
|
62
52
|
from inspect_ai._util.images import file_as_data_uri
|
63
53
|
from inspect_ai._util.logger import warn_once
|
64
54
|
from inspect_ai._util.url import data_uri_mime_type, data_uri_to_base64
|
@@ -70,11 +60,14 @@ from .._model import ModelAPI
|
|
70
60
|
from .._model_call import ModelCall
|
71
61
|
from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage, StopReason
|
72
62
|
from .util import environment_prerequisite_error, model_base_url
|
63
|
+
from .util.hooks import HttpxHooks
|
73
64
|
|
74
65
|
logger = getLogger(__name__)
|
75
66
|
|
76
67
|
ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
|
77
68
|
|
69
|
+
INTERNAL_COMPUTER_TOOL_NAME = "computer"
|
70
|
+
|
78
71
|
|
79
72
|
class AnthropicAPI(ModelAPI):
|
80
73
|
def __init__(
|
@@ -93,7 +86,7 @@ class AnthropicAPI(ModelAPI):
|
|
93
86
|
else:
|
94
87
|
self.service = None
|
95
88
|
|
96
|
-
# collect
|
89
|
+
# collect generate model_args (then delete them so we can pass the rest on)
|
97
90
|
def collect_model_arg(name: str) -> Any | None:
|
98
91
|
nonlocal model_args
|
99
92
|
value = model_args.get(name, None)
|
@@ -193,14 +186,11 @@ class AnthropicAPI(ModelAPI):
|
|
193
186
|
|
194
187
|
# generate
|
195
188
|
try:
|
196
|
-
(
|
197
|
-
|
198
|
-
|
199
|
-
messages,
|
200
|
-
computer_use,
|
201
|
-
) = await self.resolve_chat_input(input, tools, config)
|
189
|
+
system_param, tools_param, messages = await self.resolve_chat_input(
|
190
|
+
input, tools, config
|
191
|
+
)
|
202
192
|
|
203
|
-
# prepare request params (
|
193
|
+
# prepare request params (assembled this way so we can log the raw model call)
|
204
194
|
request = dict(messages=messages)
|
205
195
|
|
206
196
|
# system messages and tools
|
@@ -218,7 +208,13 @@ class AnthropicAPI(ModelAPI):
|
|
218
208
|
|
219
209
|
# extra headers (for time tracker and computer use)
|
220
210
|
extra_headers = headers | {HttpxHooks.REQUEST_ID_HEADER: request_id}
|
221
|
-
if
|
211
|
+
if any(
|
212
|
+
tool.get("type", None) == "computer_20250124" for tool in tools_param
|
213
|
+
):
|
214
|
+
# From: https://docs.anthropic.com/en/docs/agents-and-tools/computer-use#claude-3-7-sonnet-beta-flag
|
215
|
+
# Note: The Bash (bash_20250124) and Text Editor (text_editor_20250124)
|
216
|
+
# tools are generally available for Claude 3.5 Sonnet (new) as well and
|
217
|
+
# can be used without the computer use beta header.
|
222
218
|
betas.append("computer-use-2025-01-24")
|
223
219
|
if len(betas) > 0:
|
224
220
|
extra_headers["anthropic-beta"] = ",".join(betas)
|
@@ -405,9 +401,7 @@ class AnthropicAPI(ModelAPI):
|
|
405
401
|
input: list[ChatMessage],
|
406
402
|
tools: list[ToolInfo],
|
407
403
|
config: GenerateConfig,
|
408
|
-
) -> Tuple[
|
409
|
-
list[TextBlockParam] | None, list["ToolParamDef"], list[MessageParam], bool
|
410
|
-
]:
|
404
|
+
) -> Tuple[list[TextBlockParam] | None, list["ToolParamDef"], list[MessageParam]]:
|
411
405
|
# extract system message
|
412
406
|
system_messages, messages = split_system_messages(input, config)
|
413
407
|
|
@@ -420,7 +414,7 @@ class AnthropicAPI(ModelAPI):
|
|
420
414
|
)
|
421
415
|
|
422
416
|
# tools
|
423
|
-
tools_params
|
417
|
+
tools_params = [self.tool_param_for_tool_info(tool, config) for tool in tools]
|
424
418
|
|
425
419
|
# system messages
|
426
420
|
if len(system_messages) > 0:
|
@@ -470,40 +464,35 @@ class AnthropicAPI(ModelAPI):
|
|
470
464
|
add_cache_control(cast(dict[str, Any], content[-1]))
|
471
465
|
|
472
466
|
# return chat input
|
473
|
-
return system_param, tools_params, message_params
|
474
|
-
|
475
|
-
def
|
476
|
-
self,
|
477
|
-
) ->
|
478
|
-
# tool
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
467
|
+
return system_param, tools_params, message_params
|
468
|
+
|
469
|
+
def tool_param_for_tool_info(
|
470
|
+
self, tool: ToolInfo, config: GenerateConfig
|
471
|
+
) -> "ToolParamDef":
|
472
|
+
# Use a native tool implementation when available. Otherwise, use the
|
473
|
+
# standard tool implementation
|
474
|
+
return self.maybe_native_tool_param(tool, config) or ToolParam(
|
475
|
+
name=tool.name,
|
476
|
+
description=tool.description,
|
477
|
+
input_schema=tool.parameters.model_dump(exclude_none=True),
|
478
|
+
)
|
479
|
+
|
480
|
+
def maybe_native_tool_param(
|
481
|
+
self, tool: ToolInfo, config: GenerateConfig
|
482
|
+
) -> Optional["ToolParamDef"]:
|
483
|
+
return (
|
484
|
+
(
|
486
485
|
self.computer_use_tool_param(tool)
|
487
|
-
|
488
|
-
|
486
|
+
or self.text_editor_tool_param(tool)
|
487
|
+
or self.bash_tool_param(tool)
|
489
488
|
)
|
490
|
-
if
|
491
|
-
|
492
|
-
|
493
|
-
else:
|
494
|
-
tool_params.append(
|
495
|
-
ToolParam(
|
496
|
-
name=tool.name,
|
497
|
-
description=tool.description,
|
498
|
-
input_schema=tool.parameters.model_dump(exclude_none=True),
|
499
|
-
)
|
500
|
-
)
|
501
|
-
|
502
|
-
return tool_params, computer_use
|
489
|
+
if config.internal_tools is not False
|
490
|
+
else None
|
491
|
+
)
|
503
492
|
|
504
493
|
def computer_use_tool_param(
|
505
494
|
self, tool: ToolInfo
|
506
|
-
) -> Optional[
|
495
|
+
) -> Optional[BetaToolComputerUse20250124Param]:
|
507
496
|
# check for compatible 'computer' tool
|
508
497
|
if tool.name == "computer" and (
|
509
498
|
sorted(tool.parameters.properties.keys())
|
@@ -525,7 +514,7 @@ class AnthropicAPI(ModelAPI):
|
|
525
514
|
"Use of Anthropic's native computer use support is not enabled in Claude 3.5. Please use 3.7 or later to leverage the native support.",
|
526
515
|
)
|
527
516
|
return None
|
528
|
-
return
|
517
|
+
return BetaToolComputerUse20250124Param(
|
529
518
|
type="computer_20250124",
|
530
519
|
name="computer",
|
531
520
|
# Note: The dimensions passed here for display_width_px and display_height_px should
|
@@ -542,23 +531,58 @@ class AnthropicAPI(ModelAPI):
|
|
542
531
|
else:
|
543
532
|
return None
|
544
533
|
|
534
|
+
def text_editor_tool_param(
|
535
|
+
self, tool: ToolInfo
|
536
|
+
) -> Optional[ToolTextEditor20250124Param]:
|
537
|
+
# check for compatible 'text editor' tool
|
538
|
+
if tool.name == "text_editor" and (
|
539
|
+
sorted(tool.parameters.properties.keys())
|
540
|
+
== sorted(
|
541
|
+
[
|
542
|
+
"command",
|
543
|
+
"file_text",
|
544
|
+
"insert_line",
|
545
|
+
"new_str",
|
546
|
+
"old_str",
|
547
|
+
"path",
|
548
|
+
"view_range",
|
549
|
+
]
|
550
|
+
)
|
551
|
+
):
|
552
|
+
return ToolTextEditor20250124Param(
|
553
|
+
type="text_editor_20250124", name="str_replace_editor"
|
554
|
+
)
|
555
|
+
# not a text_editor tool
|
556
|
+
else:
|
557
|
+
return None
|
545
558
|
|
546
|
-
|
547
|
-
#
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
559
|
+
def bash_tool_param(self, tool: ToolInfo) -> Optional[ToolBash20250124Param]:
|
560
|
+
# check for compatible 'bash' tool
|
561
|
+
if tool.name == "bash_session" and (
|
562
|
+
sorted(tool.parameters.properties.keys()) == sorted(["command", "restart"])
|
563
|
+
):
|
564
|
+
return ToolBash20250124Param(type="bash_20250124", name="bash")
|
565
|
+
# not a bash tool
|
566
|
+
else:
|
567
|
+
return None
|
554
568
|
|
555
569
|
|
556
|
-
# tools can be either a stock tool param or a special
|
557
|
-
ToolParamDef =
|
570
|
+
# tools can be either a stock tool param or a special Anthropic native use tool param
|
571
|
+
ToolParamDef = (
|
572
|
+
ToolParam
|
573
|
+
| BetaToolComputerUse20250124Param
|
574
|
+
| ToolTextEditor20250124Param
|
575
|
+
| ToolBash20250124Param
|
576
|
+
)
|
558
577
|
|
559
578
|
|
560
579
|
def add_cache_control(
|
561
|
-
param: TextBlockParam
|
580
|
+
param: TextBlockParam
|
581
|
+
| ToolParam
|
582
|
+
| BetaToolComputerUse20250124Param
|
583
|
+
| ToolTextEditor20250124Param
|
584
|
+
| ToolBash20250124Param
|
585
|
+
| dict[str, Any],
|
562
586
|
) -> None:
|
563
587
|
cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
|
564
588
|
|
@@ -567,10 +591,10 @@ def consecutive_user_message_reducer(
|
|
567
591
|
messages: list[MessageParam],
|
568
592
|
message: MessageParam,
|
569
593
|
) -> list[MessageParam]:
|
570
|
-
return
|
594
|
+
return consecutive_message_reducer(messages, message, "user")
|
571
595
|
|
572
596
|
|
573
|
-
def
|
597
|
+
def consecutive_message_reducer(
|
574
598
|
messages: list[MessageParam],
|
575
599
|
message: MessageParam,
|
576
600
|
role: Literal["user", "assistant"],
|
@@ -583,6 +607,7 @@ def consective_message_reducer(
|
|
583
607
|
|
584
608
|
|
585
609
|
def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
|
610
|
+
# TODO: Fix this code as it currently drops interesting properties when combining
|
586
611
|
role = a["role"]
|
587
612
|
a_content = a["content"]
|
588
613
|
b_content = b["content"]
|
@@ -702,7 +727,7 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
702
727
|
ToolUseBlockParam(
|
703
728
|
type="tool_use",
|
704
729
|
id=tool_call.id,
|
705
|
-
name=tool_call.function,
|
730
|
+
name=tool_call.internal_name or tool_call.function,
|
706
731
|
input=tool_call.arguments,
|
707
732
|
)
|
708
733
|
)
|
@@ -749,11 +774,13 @@ async def model_output_from_message(
|
|
749
774
|
content.append(ContentText(type="text", text=content_text))
|
750
775
|
elif isinstance(content_block, ToolUseBlock):
|
751
776
|
tool_calls = tool_calls or []
|
777
|
+
info = maybe_mapped_call_info(content_block.name, tools)
|
752
778
|
tool_calls.append(
|
753
779
|
ToolCall(
|
754
|
-
type=
|
780
|
+
type=info.internal_type,
|
755
781
|
id=content_block.id,
|
756
|
-
function=
|
782
|
+
function=info.inspect_name,
|
783
|
+
internal_name=info.internal_name,
|
757
784
|
arguments=content_block.model_dump().get("input", {}),
|
758
785
|
)
|
759
786
|
)
|
@@ -803,6 +830,37 @@ async def model_output_from_message(
|
|
803
830
|
)
|
804
831
|
|
805
832
|
|
833
|
+
class CallInfo(NamedTuple):
|
834
|
+
internal_name: str | None
|
835
|
+
internal_type: str
|
836
|
+
inspect_name: str
|
837
|
+
|
838
|
+
|
839
|
+
def maybe_mapped_call_info(tool_called: str, tools: list[ToolInfo]) -> CallInfo:
|
840
|
+
"""
|
841
|
+
Return call info - potentially transformed by native tool mappings.
|
842
|
+
|
843
|
+
Anthropic prescribes names for their native tools - `computer`, `bash`, and
|
844
|
+
`str_replace_editor`. For a variety of reasons, Inspect's tool names to not
|
845
|
+
necessarily conform to internal names. Anthropic also provides specific tool
|
846
|
+
types for these built-in tools.
|
847
|
+
"""
|
848
|
+
mappings = (
|
849
|
+
(INTERNAL_COMPUTER_TOOL_NAME, "computer_20250124", "computer"),
|
850
|
+
("str_replace_editor", "text_editor_20250124", "text_editor"),
|
851
|
+
("bash", "bash_20250124", "bash_session"),
|
852
|
+
)
|
853
|
+
|
854
|
+
return next(
|
855
|
+
(
|
856
|
+
CallInfo(entry[0], entry[1], entry[2])
|
857
|
+
for entry in mappings
|
858
|
+
if entry[0] == tool_called and any(tool.name == entry[2] for tool in tools)
|
859
|
+
),
|
860
|
+
CallInfo(None, "function", tool_called),
|
861
|
+
)
|
862
|
+
|
863
|
+
|
806
864
|
def message_stop_reason(message: Message) -> StopReason:
|
807
865
|
match message.stop_reason:
|
808
866
|
case "end_turn" | "stop_sequence":
|
@@ -67,6 +67,16 @@ class OpenAIAPI(ModelAPI):
|
|
67
67
|
config: GenerateConfig = GenerateConfig(),
|
68
68
|
**model_args: Any,
|
69
69
|
) -> None:
|
70
|
+
# extract azure service prefix from model name (other providers
|
71
|
+
# that subclass from us like together expect to have the qualifier
|
72
|
+
# in the model name e.g. google/gemma-2b-it)
|
73
|
+
parts = model_name.split("/")
|
74
|
+
if parts[0] == "azure" and len(parts) > 1:
|
75
|
+
self.service: str | None = parts[0]
|
76
|
+
model_name = "/".join(parts[1:])
|
77
|
+
else:
|
78
|
+
self.service = None
|
79
|
+
|
70
80
|
# call super
|
71
81
|
super().__init__(
|
72
82
|
model_name=model_name,
|
@@ -76,14 +86,6 @@ class OpenAIAPI(ModelAPI):
|
|
76
86
|
config=config,
|
77
87
|
)
|
78
88
|
|
79
|
-
# extract any service prefix from model name
|
80
|
-
parts = model_name.split("/")
|
81
|
-
if len(parts) > 1:
|
82
|
-
self.service: str | None = parts[0]
|
83
|
-
model_name = "/".join(parts[1:])
|
84
|
-
else:
|
85
|
-
self.service = None
|
86
|
-
|
87
89
|
# resolve api_key
|
88
90
|
if not self.api_key:
|
89
91
|
self.api_key = os.environ.get(
|
@@ -322,6 +324,7 @@ class OpenAIAPI(ModelAPI):
|
|
322
324
|
config.reasoning_effort is not None
|
323
325
|
and not self.is_gpt()
|
324
326
|
and not self.is_o1_mini()
|
327
|
+
and not self.is_o1_preview()
|
325
328
|
):
|
326
329
|
params["reasoning_effort"] = config.reasoning_effort
|
327
330
|
if config.response_schema is not None:
|
@@ -34,8 +34,8 @@ from inspect_ai._util.content import (
|
|
34
34
|
Content,
|
35
35
|
ContentAudio,
|
36
36
|
ContentImage,
|
37
|
+
ContentReasoning,
|
37
38
|
ContentText,
|
38
|
-
ContentVideo,
|
39
39
|
)
|
40
40
|
from inspect_ai._util.http import is_retryable_http_status
|
41
41
|
from inspect_ai._util.images import file_as_data
|
@@ -336,10 +336,13 @@ async def content_part(content: Content | str) -> Part:
|
|
336
336
|
elif isinstance(content, ContentImage):
|
337
337
|
image_bytes, mime_type = await file_as_data(content.image)
|
338
338
|
return Part.from_image(image=Image.from_bytes(data=image_bytes))
|
339
|
+
elif isinstance(content, ContentReasoning):
|
340
|
+
return Part.from_text(content.reasoning or NO_CONTENT)
|
339
341
|
else:
|
340
342
|
if isinstance(content, ContentAudio):
|
341
343
|
file = content.audio
|
342
|
-
|
344
|
+
else:
|
345
|
+
# it's ContentVideo
|
343
346
|
file = content.video
|
344
347
|
file_bytes, mime_type = await file_as_data(file)
|
345
348
|
return Part.from_data(file_bytes, mime_type)
|
inspect_ai/tool/__init__.py
CHANGED
@@ -22,17 +22,21 @@ from ._tool_def import ToolDef
|
|
22
22
|
from ._tool_info import ToolInfo
|
23
23
|
from ._tool_params import ToolParam, ToolParams
|
24
24
|
from ._tool_with import tool_with
|
25
|
+
from ._tools._bash_session import bash_session
|
25
26
|
from ._tools._computer import computer
|
26
27
|
from ._tools._execute import bash, python
|
28
|
+
from ._tools._text_editor import text_editor
|
27
29
|
from ._tools._web_browser import web_browser
|
28
30
|
from ._tools._web_search import web_search
|
29
31
|
|
30
32
|
__all__ = [
|
31
33
|
"bash",
|
34
|
+
"bash_session",
|
32
35
|
"computer",
|
33
36
|
"python",
|
34
37
|
"web_browser",
|
35
38
|
"web_search",
|
39
|
+
"text_editor",
|
36
40
|
"tool",
|
37
41
|
"tool_with",
|
38
42
|
"Tool",
|
inspect_ai/tool/_tool_call.py
CHANGED
@@ -44,8 +44,11 @@ class ToolCall:
|
|
44
44
|
arguments: dict[str, Any]
|
45
45
|
"""Arguments to function."""
|
46
46
|
|
47
|
-
type:
|
48
|
-
"""Type of tool call (
|
47
|
+
type: str
|
48
|
+
"""Type of tool call ('function' or a model specific internal tool type)"""
|
49
|
+
|
50
|
+
internal_name: str | None = field(default=None)
|
51
|
+
"""Model's internal name for the tool - if any."""
|
49
52
|
|
50
53
|
parse_error: str | None = field(default=None)
|
51
54
|
"""Error which occurred parsing tool call."""
|
@@ -0,0 +1,200 @@
|
|
1
|
+
"""
|
2
|
+
This module provides helper code for handling JSON-RPC communication between the inspect process and the `inspect-tool-support` package code running in the sandbox environment.
|
3
|
+
|
4
|
+
It includes definitions for JSON-RPC request and response models, as well as functions to create and parse JSON-RPC requests and responses.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import json
|
8
|
+
from itertools import count
|
9
|
+
from textwrap import dedent
|
10
|
+
from typing import Literal, Type, TypeVar, cast
|
11
|
+
|
12
|
+
from pydantic import BaseModel, RootModel
|
13
|
+
|
14
|
+
from inspect_ai._util.error import PrerequisiteError
|
15
|
+
from inspect_ai.tool._tool import ToolError, ToolParsingError
|
16
|
+
from inspect_ai.util import sandbox_with
|
17
|
+
from inspect_ai.util._sandbox.environment import SandboxEnvironment
|
18
|
+
|
19
|
+
|
20
|
+
class JSONRPCResponseBase(BaseModel):
|
21
|
+
jsonrpc: Literal["2.0"]
|
22
|
+
id: int | float | str
|
23
|
+
|
24
|
+
|
25
|
+
class JSONRPCSuccessResponse(JSONRPCResponseBase):
|
26
|
+
result: object
|
27
|
+
|
28
|
+
|
29
|
+
class JSONRPCError(BaseModel):
|
30
|
+
"""See: https://www.jsonrpc.org/specification#error_object"""
|
31
|
+
|
32
|
+
code: int
|
33
|
+
message: str
|
34
|
+
data: object | None = None
|
35
|
+
|
36
|
+
|
37
|
+
class JSONRPCErrorResponse(JSONRPCResponseBase):
|
38
|
+
error: JSONRPCError
|
39
|
+
|
40
|
+
|
41
|
+
class JSONRPCResponse(RootModel[JSONRPCSuccessResponse | JSONRPCErrorResponse]):
|
42
|
+
pass
|
43
|
+
|
44
|
+
|
45
|
+
BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
|
46
|
+
StrOrModelT = TypeVar("StrOrModelT", bound=str | BaseModel)
|
47
|
+
|
48
|
+
id_generator = count(666)
|
49
|
+
|
50
|
+
|
51
|
+
async def exec_sandbox_rpc(
|
52
|
+
sandbox: SandboxEnvironment,
|
53
|
+
method: str,
|
54
|
+
params: dict[str, object] | tuple[object, ...],
|
55
|
+
result_cls: Type[StrOrModelT],
|
56
|
+
timeout: int | None = None,
|
57
|
+
user: str | None = None,
|
58
|
+
) -> StrOrModelT:
|
59
|
+
"""
|
60
|
+
Execute a JSON-RPC command to a sandbox environment.
|
61
|
+
|
62
|
+
Note that the JSON RPC request is sent to the exec'ed program via stdin.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
sandbox (SandboxEnvironment): The sandbox environment to execute the command in.
|
66
|
+
method (str): The JSON-RPC method to call.
|
67
|
+
params (dict[str, object] | tuple[object, ...]): The parameters for the JSON-RPC method.
|
68
|
+
result_cls (Type[BaseModelT]): The class to use for parsing the result.
|
69
|
+
timeout (int | None, optional): The timeout for the execution. Defaults to None.
|
70
|
+
user: Optional username or UID to run the command as.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
BaseModelT: The parsed result of the JSON-RPC call.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
RuntimeError: If the sandbox execution fails or if there is an error in the JSON-RPC response.
|
77
|
+
ToolParsingError: If the JSON-RPC response contains a specific error code indicating a parsing error.
|
78
|
+
"""
|
79
|
+
exec_result = await sandbox.exec(
|
80
|
+
[SANDBOX_CLI, "exec"],
|
81
|
+
input=_create_json_rpc_request(method, params),
|
82
|
+
timeout=timeout,
|
83
|
+
user=user,
|
84
|
+
)
|
85
|
+
|
86
|
+
if not exec_result.success:
|
87
|
+
raise RuntimeError(
|
88
|
+
f"Sandbox.exec failure executing {_rpc_call_description(method, params)}: {exec_result.stderr}"
|
89
|
+
)
|
90
|
+
|
91
|
+
match _parse_json_rpc_response(exec_result.stdout, result_cls):
|
92
|
+
case JSONRPCError(code=-32601 | -32602, message=message):
|
93
|
+
raise ToolParsingError(message)
|
94
|
+
case JSONRPCError(code=-32000, message=message):
|
95
|
+
raise ToolError(message)
|
96
|
+
case JSONRPCError(code=code, message=message):
|
97
|
+
raise RuntimeError(
|
98
|
+
f"Error executing tool command {_rpc_call_description(method, params)}: {code=} {message}"
|
99
|
+
)
|
100
|
+
# case result_cls() as model: yields a mypy error since it has narrowed model down
|
101
|
+
# to BaseModel and not BaseModelT. ???
|
102
|
+
case model if isinstance(model, result_cls):
|
103
|
+
return model
|
104
|
+
case not_possible:
|
105
|
+
raise RuntimeError(
|
106
|
+
f"Error executing tool command {_rpc_call_description(method, params)}: {not_possible}"
|
107
|
+
)
|
108
|
+
|
109
|
+
|
110
|
+
SANDBOX_CLI = "inspect-tool-support"
|
111
|
+
INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB = "aisiuk/inspect-tool-support"
|
112
|
+
|
113
|
+
|
114
|
+
async def tool_container_sandbox(tool_name: str) -> SandboxEnvironment:
|
115
|
+
sb = await sandbox_with(SANDBOX_CLI, True)
|
116
|
+
if sb:
|
117
|
+
return sb
|
118
|
+
else:
|
119
|
+
msg = dedent(f"""
|
120
|
+
The {tool_name} service was not found in any of the sandboxes for this sample. Please add the {tool_name} to your configuration.
|
121
|
+
|
122
|
+
For example, the following Docker compose file uses the {INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB} reference image as its default sandbox:
|
123
|
+
|
124
|
+
services:
|
125
|
+
default:
|
126
|
+
image: "{INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB}"
|
127
|
+
init: true
|
128
|
+
|
129
|
+
Alternatively, you can include the service into your own Dockerfile:
|
130
|
+
|
131
|
+
RUN python -m venv /opt/inspect_tool_support
|
132
|
+
ENV PATH="/opt/inspect_tool_support/bin:$PATH"
|
133
|
+
RUN pip install inspect-tool-support
|
134
|
+
RUN inspect-tool-support post-install
|
135
|
+
""").strip()
|
136
|
+
raise PrerequisiteError(msg)
|
137
|
+
|
138
|
+
|
139
|
+
def _create_json_rpc_request(
|
140
|
+
method: str, params: dict[str, object] | tuple[object, ...]
|
141
|
+
) -> str:
|
142
|
+
return json.dumps(
|
143
|
+
{
|
144
|
+
"jsonrpc": "2.0",
|
145
|
+
"method": method,
|
146
|
+
"id": next(id_generator),
|
147
|
+
"params": list(params) if isinstance(params, tuple) else params,
|
148
|
+
}
|
149
|
+
)
|
150
|
+
|
151
|
+
|
152
|
+
def _rpc_call_description(
|
153
|
+
method: str, params: dict[str, object] | tuple[object, ...]
|
154
|
+
) -> str:
|
155
|
+
"""
|
156
|
+
Generate a string description of an RPC call.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
method (str): The name of the RPC method.
|
160
|
+
params (dict[str, object] | tuple[object, ...]): The parameters for the RPC method.
|
161
|
+
|
162
|
+
Returns:
|
163
|
+
str: A string description of the RPC call.
|
164
|
+
|
165
|
+
Examples:
|
166
|
+
>>> _rpc_call_description("subtract", {"minuend": 42, "subtrahend": 23})
|
167
|
+
'subtract(minuend: 42, subtrahend: 23)'
|
168
|
+
|
169
|
+
>>> _rpc_call_description("subtract", (42, 23))
|
170
|
+
'subtract(42, 23)'
|
171
|
+
"""
|
172
|
+
normalized_params = (
|
173
|
+
list(map(str, params))
|
174
|
+
if isinstance(params, tuple)
|
175
|
+
else [f"{k}: {v}" for k, v in params.items()]
|
176
|
+
)
|
177
|
+
return f"{method}({', '.join(normalized_params)})"
|
178
|
+
|
179
|
+
|
180
|
+
def _parse_json_rpc_response(
|
181
|
+
response_str: str,
|
182
|
+
result_cls: Type[StrOrModelT],
|
183
|
+
) -> StrOrModelT | JSONRPCError:
|
184
|
+
match JSONRPCResponse.model_validate_json(response_str).root:
|
185
|
+
case JSONRPCErrorResponse(error=error):
|
186
|
+
return error
|
187
|
+
case JSONRPCSuccessResponse(result=rpc_result):
|
188
|
+
# TODO: Wow. Is there really no way to convince Python to narrow these types
|
189
|
+
# and avoid the cast's
|
190
|
+
if result_cls is str:
|
191
|
+
if not isinstance(rpc_result, str):
|
192
|
+
raise ValueError(f"Expected string result, got {type(rpc_result)}")
|
193
|
+
return cast(StrOrModelT, rpc_result)
|
194
|
+
else:
|
195
|
+
return cast(
|
196
|
+
StrOrModelT,
|
197
|
+
cast(BaseModel, result_cls).model_validate(rpc_result, strict=True),
|
198
|
+
)
|
199
|
+
case _:
|
200
|
+
raise ValueError(f"Unexpected JSON RPC response: {response_str}")
|