inspect-ai 0.3.75__py3-none-any.whl → 0.3.76__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. inspect_ai/_eval/evalset.py +3 -2
  2. inspect_ai/_eval/registry.py +3 -5
  3. inspect_ai/_eval/run.py +4 -0
  4. inspect_ai/_eval/task/run.py +4 -0
  5. inspect_ai/_util/logger.py +3 -0
  6. inspect_ai/_view/www/dist/assets/index.css +28 -16
  7. inspect_ai/_view/www/dist/assets/index.js +4801 -4615
  8. inspect_ai/_view/www/log-schema.json +79 -9
  9. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
  10. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
  11. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
  12. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
  13. inspect_ai/_view/www/src/types/log.d.ts +11 -5
  14. inspect_ai/log/_recorders/json.py +8 -0
  15. inspect_ai/log/_transcript.py +13 -4
  16. inspect_ai/model/_call_tools.py +13 -4
  17. inspect_ai/model/_chat_message.py +3 -0
  18. inspect_ai/model/_model.py +5 -1
  19. inspect_ai/model/_model_output.py +6 -1
  20. inspect_ai/model/_openai.py +11 -6
  21. inspect_ai/model/_providers/anthropic.py +133 -75
  22. inspect_ai/model/_providers/openai.py +11 -8
  23. inspect_ai/model/_providers/vertex.py +5 -2
  24. inspect_ai/tool/__init__.py +4 -0
  25. inspect_ai/tool/_tool_call.py +5 -2
  26. inspect_ai/tool/_tool_support_helpers.py +200 -0
  27. inspect_ai/tool/_tools/_bash_session.py +119 -0
  28. inspect_ai/tool/_tools/_computer/_computer.py +1 -1
  29. inspect_ai/tool/_tools/_text_editor.py +121 -0
  30. inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
  31. inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
  32. inspect_ai/tool/_tools/_web_search.py +1 -1
  33. inspect_ai/util/_json.py +28 -0
  34. inspect_ai/util/_sandbox/context.py +16 -7
  35. inspect_ai/util/_sandbox/docker/config.py +1 -1
  36. inspect_ai/util/_sandbox/docker/internal.py +3 -3
  37. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/METADATA +5 -2
  38. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/RECORD +42 -68
  39. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/WHEEL +1 -1
  40. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
  41. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
  42. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
  43. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
  44. inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
  45. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
  46. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
  47. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
  48. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
  49. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
  50. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
  51. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
  52. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
  53. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
  54. inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
  55. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
  56. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
  57. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
  58. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
  59. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
  60. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
  61. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
  62. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
  63. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
  64. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
  65. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
  66. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
  67. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
  68. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
  69. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
  70. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/entry_points.txt +0 -0
  71. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info/licenses}/LICENSE +0 -0
  72. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,12 @@
1
1
  import functools
2
2
  import os
3
3
  import re
4
- import sys
5
4
  from copy import copy
6
5
  from logging import getLogger
7
- from typing import Any, Literal, Optional, Tuple, TypedDict, cast
6
+ from typing import Any, Literal, NamedTuple, Optional, Tuple, cast
8
7
 
9
8
  import httpcore
10
9
  import httpx
11
-
12
- from inspect_ai._util.http import is_retryable_http_status
13
-
14
- from .util.hooks import HttpxHooks
15
-
16
- if sys.version_info >= (3, 11):
17
- from typing import NotRequired
18
- else:
19
- from typing_extensions import NotRequired
20
-
21
10
  from anthropic import (
22
11
  APIConnectionError,
23
12
  APIStatusError,
@@ -39,19 +28,19 @@ from anthropic.types import (
39
28
  TextBlockParam,
40
29
  ThinkingBlock,
41
30
  ThinkingBlockParam,
31
+ ToolBash20250124Param,
42
32
  ToolParam,
43
33
  ToolResultBlockParam,
34
+ ToolTextEditor20250124Param,
44
35
  ToolUseBlock,
45
36
  ToolUseBlockParam,
46
37
  message_create_params,
47
38
  )
39
+ from anthropic.types.beta import BetaToolComputerUse20250124Param
48
40
  from pydantic import JsonValue
49
41
  from typing_extensions import override
50
42
 
51
- from inspect_ai._util.constants import (
52
- BASE_64_DATA_REMOVED,
53
- NO_CONTENT,
54
- )
43
+ from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
55
44
  from inspect_ai._util.content import (
56
45
  Content,
57
46
  ContentImage,
@@ -59,6 +48,7 @@ from inspect_ai._util.content import (
59
48
  ContentText,
60
49
  )
61
50
  from inspect_ai._util.error import exception_message
51
+ from inspect_ai._util.http import is_retryable_http_status
62
52
  from inspect_ai._util.images import file_as_data_uri
63
53
  from inspect_ai._util.logger import warn_once
64
54
  from inspect_ai._util.url import data_uri_mime_type, data_uri_to_base64
@@ -70,11 +60,14 @@ from .._model import ModelAPI
70
60
  from .._model_call import ModelCall
71
61
  from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage, StopReason
72
62
  from .util import environment_prerequisite_error, model_base_url
63
+ from .util.hooks import HttpxHooks
73
64
 
74
65
  logger = getLogger(__name__)
75
66
 
76
67
  ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
77
68
 
69
+ INTERNAL_COMPUTER_TOOL_NAME = "computer"
70
+
78
71
 
79
72
  class AnthropicAPI(ModelAPI):
80
73
  def __init__(
@@ -93,7 +86,7 @@ class AnthropicAPI(ModelAPI):
93
86
  else:
94
87
  self.service = None
95
88
 
96
- # collect gemerate model_args (then delete them so we can pass the rest on)
89
+ # collect generate model_args (then delete them so we can pass the rest on)
97
90
  def collect_model_arg(name: str) -> Any | None:
98
91
  nonlocal model_args
99
92
  value = model_args.get(name, None)
@@ -193,14 +186,11 @@ class AnthropicAPI(ModelAPI):
193
186
 
194
187
  # generate
195
188
  try:
196
- (
197
- system_param,
198
- tools_param,
199
- messages,
200
- computer_use,
201
- ) = await self.resolve_chat_input(input, tools, config)
189
+ system_param, tools_param, messages = await self.resolve_chat_input(
190
+ input, tools, config
191
+ )
202
192
 
203
- # prepare request params (assembed this way so we can log the raw model call)
193
+ # prepare request params (assembled this way so we can log the raw model call)
204
194
  request = dict(messages=messages)
205
195
 
206
196
  # system messages and tools
@@ -218,7 +208,13 @@ class AnthropicAPI(ModelAPI):
218
208
 
219
209
  # extra headers (for time tracker and computer use)
220
210
  extra_headers = headers | {HttpxHooks.REQUEST_ID_HEADER: request_id}
221
- if computer_use:
211
+ if any(
212
+ tool.get("type", None) == "computer_20250124" for tool in tools_param
213
+ ):
214
+ # From: https://docs.anthropic.com/en/docs/agents-and-tools/computer-use#claude-3-7-sonnet-beta-flag
215
+ # Note: The Bash (bash_20250124) and Text Editor (text_editor_20250124)
216
+ # tools are generally available for Claude 3.5 Sonnet (new) as well and
217
+ # can be used without the computer use beta header.
222
218
  betas.append("computer-use-2025-01-24")
223
219
  if len(betas) > 0:
224
220
  extra_headers["anthropic-beta"] = ",".join(betas)
@@ -405,9 +401,7 @@ class AnthropicAPI(ModelAPI):
405
401
  input: list[ChatMessage],
406
402
  tools: list[ToolInfo],
407
403
  config: GenerateConfig,
408
- ) -> Tuple[
409
- list[TextBlockParam] | None, list["ToolParamDef"], list[MessageParam], bool
410
- ]:
404
+ ) -> Tuple[list[TextBlockParam] | None, list["ToolParamDef"], list[MessageParam]]:
411
405
  # extract system message
412
406
  system_messages, messages = split_system_messages(input, config)
413
407
 
@@ -420,7 +414,7 @@ class AnthropicAPI(ModelAPI):
420
414
  )
421
415
 
422
416
  # tools
423
- tools_params, computer_use = self.tool_params_for_tools(tools, config)
417
+ tools_params = [self.tool_param_for_tool_info(tool, config) for tool in tools]
424
418
 
425
419
  # system messages
426
420
  if len(system_messages) > 0:
@@ -470,40 +464,35 @@ class AnthropicAPI(ModelAPI):
470
464
  add_cache_control(cast(dict[str, Any], content[-1]))
471
465
 
472
466
  # return chat input
473
- return system_param, tools_params, message_params, computer_use
474
-
475
- def tool_params_for_tools(
476
- self, tools: list[ToolInfo], config: GenerateConfig
477
- ) -> tuple[list["ToolParamDef"], bool]:
478
- # tool params and computer_use bit to return
479
- tool_params: list["ToolParamDef"] = []
480
- computer_use = False
481
-
482
- # for each tool, check if it has a native computer use implementation and use that
483
- # when available (noting that we need to set the computer use request header)
484
- for tool in tools:
485
- computer_use_tool = (
467
+ return system_param, tools_params, message_params
468
+
469
+ def tool_param_for_tool_info(
470
+ self, tool: ToolInfo, config: GenerateConfig
471
+ ) -> "ToolParamDef":
472
+ # Use a native tool implementation when available. Otherwise, use the
473
+ # standard tool implementation
474
+ return self.maybe_native_tool_param(tool, config) or ToolParam(
475
+ name=tool.name,
476
+ description=tool.description,
477
+ input_schema=tool.parameters.model_dump(exclude_none=True),
478
+ )
479
+
480
+ def maybe_native_tool_param(
481
+ self, tool: ToolInfo, config: GenerateConfig
482
+ ) -> Optional["ToolParamDef"]:
483
+ return (
484
+ (
486
485
  self.computer_use_tool_param(tool)
487
- if config.internal_tools is not False
488
- else None
486
+ or self.text_editor_tool_param(tool)
487
+ or self.bash_tool_param(tool)
489
488
  )
490
- if computer_use_tool:
491
- tool_params.append(computer_use_tool)
492
- computer_use = True
493
- else:
494
- tool_params.append(
495
- ToolParam(
496
- name=tool.name,
497
- description=tool.description,
498
- input_schema=tool.parameters.model_dump(exclude_none=True),
499
- )
500
- )
501
-
502
- return tool_params, computer_use
489
+ if config.internal_tools is not False
490
+ else None
491
+ )
503
492
 
504
493
  def computer_use_tool_param(
505
494
  self, tool: ToolInfo
506
- ) -> Optional["ComputerUseToolParam"]:
495
+ ) -> Optional[BetaToolComputerUse20250124Param]:
507
496
  # check for compatible 'computer' tool
508
497
  if tool.name == "computer" and (
509
498
  sorted(tool.parameters.properties.keys())
@@ -525,7 +514,7 @@ class AnthropicAPI(ModelAPI):
525
514
  "Use of Anthropic's native computer use support is not enabled in Claude 3.5. Please use 3.7 or later to leverage the native support.",
526
515
  )
527
516
  return None
528
- return ComputerUseToolParam(
517
+ return BetaToolComputerUse20250124Param(
529
518
  type="computer_20250124",
530
519
  name="computer",
531
520
  # Note: The dimensions passed here for display_width_px and display_height_px should
@@ -542,23 +531,58 @@ class AnthropicAPI(ModelAPI):
542
531
  else:
543
532
  return None
544
533
 
534
+ def text_editor_tool_param(
535
+ self, tool: ToolInfo
536
+ ) -> Optional[ToolTextEditor20250124Param]:
537
+ # check for compatible 'text editor' tool
538
+ if tool.name == "text_editor" and (
539
+ sorted(tool.parameters.properties.keys())
540
+ == sorted(
541
+ [
542
+ "command",
543
+ "file_text",
544
+ "insert_line",
545
+ "new_str",
546
+ "old_str",
547
+ "path",
548
+ "view_range",
549
+ ]
550
+ )
551
+ ):
552
+ return ToolTextEditor20250124Param(
553
+ type="text_editor_20250124", name="str_replace_editor"
554
+ )
555
+ # not a text_editor tool
556
+ else:
557
+ return None
545
558
 
546
- # native anthropic tool definitions for computer use beta
547
- # https://docs.anthropic.com/en/docs/build-with-claude/computer-use
548
- class ComputerUseToolParam(TypedDict):
549
- type: str
550
- name: str
551
- display_width_px: NotRequired[int]
552
- display_height_px: NotRequired[int]
553
- display_number: NotRequired[int]
559
+ def bash_tool_param(self, tool: ToolInfo) -> Optional[ToolBash20250124Param]:
560
+ # check for compatible 'bash' tool
561
+ if tool.name == "bash_session" and (
562
+ sorted(tool.parameters.properties.keys()) == sorted(["command", "restart"])
563
+ ):
564
+ return ToolBash20250124Param(type="bash_20250124", name="bash")
565
+ # not a bash tool
566
+ else:
567
+ return None
554
568
 
555
569
 
556
- # tools can be either a stock tool param or a special computer use tool param
557
- ToolParamDef = ToolParam | ComputerUseToolParam
570
+ # tools can be either a stock tool param or a special Anthropic native use tool param
571
+ ToolParamDef = (
572
+ ToolParam
573
+ | BetaToolComputerUse20250124Param
574
+ | ToolTextEditor20250124Param
575
+ | ToolBash20250124Param
576
+ )
558
577
 
559
578
 
560
579
  def add_cache_control(
561
- param: TextBlockParam | ToolParam | ComputerUseToolParam | dict[str, Any],
580
+ param: TextBlockParam
581
+ | ToolParam
582
+ | BetaToolComputerUse20250124Param
583
+ | ToolTextEditor20250124Param
584
+ | ToolBash20250124Param
585
+ | dict[str, Any],
562
586
  ) -> None:
563
587
  cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
564
588
 
@@ -567,10 +591,10 @@ def consecutive_user_message_reducer(
567
591
  messages: list[MessageParam],
568
592
  message: MessageParam,
569
593
  ) -> list[MessageParam]:
570
- return consective_message_reducer(messages, message, "user")
594
+ return consecutive_message_reducer(messages, message, "user")
571
595
 
572
596
 
573
- def consective_message_reducer(
597
+ def consecutive_message_reducer(
574
598
  messages: list[MessageParam],
575
599
  message: MessageParam,
576
600
  role: Literal["user", "assistant"],
@@ -583,6 +607,7 @@ def consective_message_reducer(
583
607
 
584
608
 
585
609
  def combine_messages(a: MessageParam, b: MessageParam) -> MessageParam:
610
+ # TODO: Fix this code as it currently drops interesting properties when combining
586
611
  role = a["role"]
587
612
  a_content = a["content"]
588
613
  b_content = b["content"]
@@ -702,7 +727,7 @@ async def message_param(message: ChatMessage) -> MessageParam:
702
727
  ToolUseBlockParam(
703
728
  type="tool_use",
704
729
  id=tool_call.id,
705
- name=tool_call.function,
730
+ name=tool_call.internal_name or tool_call.function,
706
731
  input=tool_call.arguments,
707
732
  )
708
733
  )
@@ -749,11 +774,13 @@ async def model_output_from_message(
749
774
  content.append(ContentText(type="text", text=content_text))
750
775
  elif isinstance(content_block, ToolUseBlock):
751
776
  tool_calls = tool_calls or []
777
+ info = maybe_mapped_call_info(content_block.name, tools)
752
778
  tool_calls.append(
753
779
  ToolCall(
754
- type="function",
780
+ type=info.internal_type,
755
781
  id=content_block.id,
756
- function=content_block.name,
782
+ function=info.inspect_name,
783
+ internal_name=info.internal_name,
757
784
  arguments=content_block.model_dump().get("input", {}),
758
785
  )
759
786
  )
@@ -803,6 +830,37 @@ async def model_output_from_message(
803
830
  )
804
831
 
805
832
 
833
+ class CallInfo(NamedTuple):
834
+ internal_name: str | None
835
+ internal_type: str
836
+ inspect_name: str
837
+
838
+
839
+ def maybe_mapped_call_info(tool_called: str, tools: list[ToolInfo]) -> CallInfo:
840
+ """
841
+ Return call info - potentially transformed by native tool mappings.
842
+
843
+ Anthropic prescribes names for their native tools - `computer`, `bash`, and
844
+ `str_replace_editor`. For a variety of reasons, Inspect's tool names to not
845
+ necessarily conform to internal names. Anthropic also provides specific tool
846
+ types for these built-in tools.
847
+ """
848
+ mappings = (
849
+ (INTERNAL_COMPUTER_TOOL_NAME, "computer_20250124", "computer"),
850
+ ("str_replace_editor", "text_editor_20250124", "text_editor"),
851
+ ("bash", "bash_20250124", "bash_session"),
852
+ )
853
+
854
+ return next(
855
+ (
856
+ CallInfo(entry[0], entry[1], entry[2])
857
+ for entry in mappings
858
+ if entry[0] == tool_called and any(tool.name == entry[2] for tool in tools)
859
+ ),
860
+ CallInfo(None, "function", tool_called),
861
+ )
862
+
863
+
806
864
  def message_stop_reason(message: Message) -> StopReason:
807
865
  match message.stop_reason:
808
866
  case "end_turn" | "stop_sequence":
@@ -67,6 +67,16 @@ class OpenAIAPI(ModelAPI):
67
67
  config: GenerateConfig = GenerateConfig(),
68
68
  **model_args: Any,
69
69
  ) -> None:
70
+ # extract azure service prefix from model name (other providers
71
+ # that subclass from us like together expect to have the qualifier
72
+ # in the model name e.g. google/gemma-2b-it)
73
+ parts = model_name.split("/")
74
+ if parts[0] == "azure" and len(parts) > 1:
75
+ self.service: str | None = parts[0]
76
+ model_name = "/".join(parts[1:])
77
+ else:
78
+ self.service = None
79
+
70
80
  # call super
71
81
  super().__init__(
72
82
  model_name=model_name,
@@ -76,14 +86,6 @@ class OpenAIAPI(ModelAPI):
76
86
  config=config,
77
87
  )
78
88
 
79
- # extract any service prefix from model name
80
- parts = model_name.split("/")
81
- if len(parts) > 1:
82
- self.service: str | None = parts[0]
83
- model_name = "/".join(parts[1:])
84
- else:
85
- self.service = None
86
-
87
89
  # resolve api_key
88
90
  if not self.api_key:
89
91
  self.api_key = os.environ.get(
@@ -322,6 +324,7 @@ class OpenAIAPI(ModelAPI):
322
324
  config.reasoning_effort is not None
323
325
  and not self.is_gpt()
324
326
  and not self.is_o1_mini()
327
+ and not self.is_o1_preview()
325
328
  ):
326
329
  params["reasoning_effort"] = config.reasoning_effort
327
330
  if config.response_schema is not None:
@@ -34,8 +34,8 @@ from inspect_ai._util.content import (
34
34
  Content,
35
35
  ContentAudio,
36
36
  ContentImage,
37
+ ContentReasoning,
37
38
  ContentText,
38
- ContentVideo,
39
39
  )
40
40
  from inspect_ai._util.http import is_retryable_http_status
41
41
  from inspect_ai._util.images import file_as_data
@@ -336,10 +336,13 @@ async def content_part(content: Content | str) -> Part:
336
336
  elif isinstance(content, ContentImage):
337
337
  image_bytes, mime_type = await file_as_data(content.image)
338
338
  return Part.from_image(image=Image.from_bytes(data=image_bytes))
339
+ elif isinstance(content, ContentReasoning):
340
+ return Part.from_text(content.reasoning or NO_CONTENT)
339
341
  else:
340
342
  if isinstance(content, ContentAudio):
341
343
  file = content.audio
342
- elif isinstance(content, ContentVideo):
344
+ else:
345
+ # it's ContentVideo
343
346
  file = content.video
344
347
  file_bytes, mime_type = await file_as_data(file)
345
348
  return Part.from_data(file_bytes, mime_type)
@@ -22,17 +22,21 @@ from ._tool_def import ToolDef
22
22
  from ._tool_info import ToolInfo
23
23
  from ._tool_params import ToolParam, ToolParams
24
24
  from ._tool_with import tool_with
25
+ from ._tools._bash_session import bash_session
25
26
  from ._tools._computer import computer
26
27
  from ._tools._execute import bash, python
28
+ from ._tools._text_editor import text_editor
27
29
  from ._tools._web_browser import web_browser
28
30
  from ._tools._web_search import web_search
29
31
 
30
32
  __all__ = [
31
33
  "bash",
34
+ "bash_session",
32
35
  "computer",
33
36
  "python",
34
37
  "web_browser",
35
38
  "web_search",
39
+ "text_editor",
36
40
  "tool",
37
41
  "tool_with",
38
42
  "Tool",
@@ -44,8 +44,11 @@ class ToolCall:
44
44
  arguments: dict[str, Any]
45
45
  """Arguments to function."""
46
46
 
47
- type: Literal["function"]
48
- """Type of tool call (currently only 'function')"""
47
+ type: str
48
+ """Type of tool call ('function' or a model specific internal tool type)"""
49
+
50
+ internal_name: str | None = field(default=None)
51
+ """Model's internal name for the tool - if any."""
49
52
 
50
53
  parse_error: str | None = field(default=None)
51
54
  """Error which occurred parsing tool call."""
@@ -0,0 +1,200 @@
1
+ """
2
+ This module provides helper code for handling JSON-RPC communication between the inspect process and the `inspect-tool-support` package code running in the sandbox environment.
3
+
4
+ It includes definitions for JSON-RPC request and response models, as well as functions to create and parse JSON-RPC requests and responses.
5
+ """
6
+
7
+ import json
8
+ from itertools import count
9
+ from textwrap import dedent
10
+ from typing import Literal, Type, TypeVar, cast
11
+
12
+ from pydantic import BaseModel, RootModel
13
+
14
+ from inspect_ai._util.error import PrerequisiteError
15
+ from inspect_ai.tool._tool import ToolError, ToolParsingError
16
+ from inspect_ai.util import sandbox_with
17
+ from inspect_ai.util._sandbox.environment import SandboxEnvironment
18
+
19
+
20
+ class JSONRPCResponseBase(BaseModel):
21
+ jsonrpc: Literal["2.0"]
22
+ id: int | float | str
23
+
24
+
25
+ class JSONRPCSuccessResponse(JSONRPCResponseBase):
26
+ result: object
27
+
28
+
29
+ class JSONRPCError(BaseModel):
30
+ """See: https://www.jsonrpc.org/specification#error_object"""
31
+
32
+ code: int
33
+ message: str
34
+ data: object | None = None
35
+
36
+
37
+ class JSONRPCErrorResponse(JSONRPCResponseBase):
38
+ error: JSONRPCError
39
+
40
+
41
+ class JSONRPCResponse(RootModel[JSONRPCSuccessResponse | JSONRPCErrorResponse]):
42
+ pass
43
+
44
+
45
+ BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
46
+ StrOrModelT = TypeVar("StrOrModelT", bound=str | BaseModel)
47
+
48
+ id_generator = count(666)
49
+
50
+
51
+ async def exec_sandbox_rpc(
52
+ sandbox: SandboxEnvironment,
53
+ method: str,
54
+ params: dict[str, object] | tuple[object, ...],
55
+ result_cls: Type[StrOrModelT],
56
+ timeout: int | None = None,
57
+ user: str | None = None,
58
+ ) -> StrOrModelT:
59
+ """
60
+ Execute a JSON-RPC command to a sandbox environment.
61
+
62
+ Note that the JSON RPC request is sent to the exec'ed program via stdin.
63
+
64
+ Args:
65
+ sandbox (SandboxEnvironment): The sandbox environment to execute the command in.
66
+ method (str): The JSON-RPC method to call.
67
+ params (dict[str, object] | tuple[object, ...]): The parameters for the JSON-RPC method.
68
+ result_cls (Type[BaseModelT]): The class to use for parsing the result.
69
+ timeout (int | None, optional): The timeout for the execution. Defaults to None.
70
+ user: Optional username or UID to run the command as.
71
+
72
+ Returns:
73
+ BaseModelT: The parsed result of the JSON-RPC call.
74
+
75
+ Raises:
76
+ RuntimeError: If the sandbox execution fails or if there is an error in the JSON-RPC response.
77
+ ToolParsingError: If the JSON-RPC response contains a specific error code indicating a parsing error.
78
+ """
79
+ exec_result = await sandbox.exec(
80
+ [SANDBOX_CLI, "exec"],
81
+ input=_create_json_rpc_request(method, params),
82
+ timeout=timeout,
83
+ user=user,
84
+ )
85
+
86
+ if not exec_result.success:
87
+ raise RuntimeError(
88
+ f"Sandbox.exec failure executing {_rpc_call_description(method, params)}: {exec_result.stderr}"
89
+ )
90
+
91
+ match _parse_json_rpc_response(exec_result.stdout, result_cls):
92
+ case JSONRPCError(code=-32601 | -32602, message=message):
93
+ raise ToolParsingError(message)
94
+ case JSONRPCError(code=-32000, message=message):
95
+ raise ToolError(message)
96
+ case JSONRPCError(code=code, message=message):
97
+ raise RuntimeError(
98
+ f"Error executing tool command {_rpc_call_description(method, params)}: {code=} {message}"
99
+ )
100
+ # case result_cls() as model: yields a mypy error since it has narrowed model down
101
+ # to BaseModel and not BaseModelT. ???
102
+ case model if isinstance(model, result_cls):
103
+ return model
104
+ case not_possible:
105
+ raise RuntimeError(
106
+ f"Error executing tool command {_rpc_call_description(method, params)}: {not_possible}"
107
+ )
108
+
109
+
110
+ SANDBOX_CLI = "inspect-tool-support"
111
+ INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB = "aisiuk/inspect-tool-support"
112
+
113
+
114
+ async def tool_container_sandbox(tool_name: str) -> SandboxEnvironment:
115
+ sb = await sandbox_with(SANDBOX_CLI, True)
116
+ if sb:
117
+ return sb
118
+ else:
119
+ msg = dedent(f"""
120
+ The {tool_name} service was not found in any of the sandboxes for this sample. Please add the {tool_name} to your configuration.
121
+
122
+ For example, the following Docker compose file uses the {INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB} reference image as its default sandbox:
123
+
124
+ services:
125
+ default:
126
+ image: "{INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB}"
127
+ init: true
128
+
129
+ Alternatively, you can include the service into your own Dockerfile:
130
+
131
+ RUN python -m venv /opt/inspect_tool_support
132
+ ENV PATH="/opt/inspect_tool_support/bin:$PATH"
133
+ RUN pip install inspect-tool-support
134
+ RUN inspect-tool-support post-install
135
+ """).strip()
136
+ raise PrerequisiteError(msg)
137
+
138
+
139
+ def _create_json_rpc_request(
140
+ method: str, params: dict[str, object] | tuple[object, ...]
141
+ ) -> str:
142
+ return json.dumps(
143
+ {
144
+ "jsonrpc": "2.0",
145
+ "method": method,
146
+ "id": next(id_generator),
147
+ "params": list(params) if isinstance(params, tuple) else params,
148
+ }
149
+ )
150
+
151
+
152
+ def _rpc_call_description(
153
+ method: str, params: dict[str, object] | tuple[object, ...]
154
+ ) -> str:
155
+ """
156
+ Generate a string description of an RPC call.
157
+
158
+ Args:
159
+ method (str): The name of the RPC method.
160
+ params (dict[str, object] | tuple[object, ...]): The parameters for the RPC method.
161
+
162
+ Returns:
163
+ str: A string description of the RPC call.
164
+
165
+ Examples:
166
+ >>> _rpc_call_description("subtract", {"minuend": 42, "subtrahend": 23})
167
+ 'subtract(minuend: 42, subtrahend: 23)'
168
+
169
+ >>> _rpc_call_description("subtract", (42, 23))
170
+ 'subtract(42, 23)'
171
+ """
172
+ normalized_params = (
173
+ list(map(str, params))
174
+ if isinstance(params, tuple)
175
+ else [f"{k}: {v}" for k, v in params.items()]
176
+ )
177
+ return f"{method}({', '.join(normalized_params)})"
178
+
179
+
180
+ def _parse_json_rpc_response(
181
+ response_str: str,
182
+ result_cls: Type[StrOrModelT],
183
+ ) -> StrOrModelT | JSONRPCError:
184
+ match JSONRPCResponse.model_validate_json(response_str).root:
185
+ case JSONRPCErrorResponse(error=error):
186
+ return error
187
+ case JSONRPCSuccessResponse(result=rpc_result):
188
+ # TODO: Wow. Is there really no way to convince Python to narrow these types
189
+ # and avoid the cast's
190
+ if result_cls is str:
191
+ if not isinstance(rpc_result, str):
192
+ raise ValueError(f"Expected string result, got {type(rpc_result)}")
193
+ return cast(StrOrModelT, rpc_result)
194
+ else:
195
+ return cast(
196
+ StrOrModelT,
197
+ cast(BaseModel, result_cls).model_validate(rpc_result, strict=True),
198
+ )
199
+ case _:
200
+ raise ValueError(f"Unexpected JSON RPC response: {response_str}")