inspect-ai 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_display/textual/app.py +14 -3
  3. inspect_ai/_display/textual/display.py +4 -0
  4. inspect_ai/_display/textual/widgets/samples.py +9 -3
  5. inspect_ai/_display/textual/widgets/task_detail.py +3 -4
  6. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  7. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  8. inspect_ai/_eval/eval.py +36 -24
  9. inspect_ai/_eval/evalset.py +17 -18
  10. inspect_ai/_eval/loader.py +34 -11
  11. inspect_ai/_eval/run.py +8 -13
  12. inspect_ai/_eval/score.py +13 -3
  13. inspect_ai/_eval/task/generate.py +8 -9
  14. inspect_ai/_eval/task/log.py +2 -0
  15. inspect_ai/_eval/task/task.py +23 -9
  16. inspect_ai/_util/file.py +13 -0
  17. inspect_ai/_util/json.py +2 -1
  18. inspect_ai/_util/registry.py +1 -0
  19. inspect_ai/_util/vscode.py +37 -0
  20. inspect_ai/_view/www/App.css +6 -0
  21. inspect_ai/_view/www/dist/assets/index.css +304 -128
  22. inspect_ai/_view/www/dist/assets/index.js +47495 -27519
  23. inspect_ai/_view/www/log-schema.json +124 -31
  24. inspect_ai/_view/www/package.json +3 -0
  25. inspect_ai/_view/www/src/App.tsx +12 -0
  26. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  27. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  28. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  29. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  30. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
  31. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
  32. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  33. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  34. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  35. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  36. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  37. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
  38. inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
  39. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
  40. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
  41. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  42. inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
  43. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
  44. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
  45. inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
  46. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
  47. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
  48. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  49. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  50. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  51. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
  52. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
  53. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
  54. inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
  55. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
  56. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
  57. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  58. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  59. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  60. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  61. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
  62. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
  64. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
  65. inspect_ai/_view/www/src/state/hooks.ts +5 -3
  66. inspect_ai/_view/www/src/state/logPolling.ts +5 -1
  67. inspect_ai/_view/www/src/state/logSlice.ts +10 -0
  68. inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
  69. inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +34 -26
  71. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  72. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  73. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
  74. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
  75. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
  76. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  77. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  78. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
  79. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  80. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
  81. inspect_ai/_view/www/yarn.lock +94 -1
  82. inspect_ai/agent/__init__.py +36 -0
  83. inspect_ai/agent/_agent.py +268 -0
  84. inspect_ai/agent/_as_solver.py +72 -0
  85. inspect_ai/agent/_as_tool.py +122 -0
  86. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  87. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  88. inspect_ai/agent/_filter.py +46 -0
  89. inspect_ai/agent/_handoff.py +93 -0
  90. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  91. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  92. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  93. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  94. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  95. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  96. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  97. inspect_ai/agent/_react.py +241 -0
  98. inspect_ai/agent/_run.py +36 -0
  99. inspect_ai/agent/_types.py +81 -0
  100. inspect_ai/log/_log.py +11 -2
  101. inspect_ai/log/_transcript.py +13 -9
  102. inspect_ai/model/__init__.py +7 -1
  103. inspect_ai/model/_call_tools.py +256 -52
  104. inspect_ai/model/_chat_message.py +7 -4
  105. inspect_ai/model/_conversation.py +13 -62
  106. inspect_ai/model/_display.py +85 -0
  107. inspect_ai/model/_model.py +113 -14
  108. inspect_ai/model/_model_output.py +14 -9
  109. inspect_ai/model/_openai.py +16 -4
  110. inspect_ai/model/_openai_computer_use.py +162 -0
  111. inspect_ai/model/_openai_responses.py +319 -165
  112. inspect_ai/model/_providers/anthropic.py +20 -21
  113. inspect_ai/model/_providers/azureai.py +24 -13
  114. inspect_ai/model/_providers/bedrock.py +1 -7
  115. inspect_ai/model/_providers/cloudflare.py +3 -3
  116. inspect_ai/model/_providers/goodfire.py +2 -6
  117. inspect_ai/model/_providers/google.py +11 -10
  118. inspect_ai/model/_providers/groq.py +6 -3
  119. inspect_ai/model/_providers/hf.py +7 -3
  120. inspect_ai/model/_providers/mistral.py +7 -10
  121. inspect_ai/model/_providers/openai.py +47 -17
  122. inspect_ai/model/_providers/openai_o1.py +11 -4
  123. inspect_ai/model/_providers/openai_responses.py +12 -14
  124. inspect_ai/model/_providers/providers.py +2 -2
  125. inspect_ai/model/_providers/together.py +12 -2
  126. inspect_ai/model/_providers/util/chatapi.py +7 -2
  127. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  128. inspect_ai/model/_providers/util/llama31.py +4 -2
  129. inspect_ai/model/_providers/vertex.py +11 -9
  130. inspect_ai/model/_providers/vllm.py +4 -4
  131. inspect_ai/scorer/__init__.py +2 -0
  132. inspect_ai/scorer/_metrics/__init__.py +2 -0
  133. inspect_ai/scorer/_metrics/grouped.py +84 -0
  134. inspect_ai/scorer/_score.py +26 -6
  135. inspect_ai/solver/__init__.py +2 -2
  136. inspect_ai/solver/_basic_agent.py +22 -9
  137. inspect_ai/solver/_bridge.py +31 -0
  138. inspect_ai/solver/_chain.py +20 -12
  139. inspect_ai/solver/_fork.py +5 -1
  140. inspect_ai/solver/_human_agent.py +52 -0
  141. inspect_ai/solver/_prompt.py +3 -1
  142. inspect_ai/solver/_run.py +59 -0
  143. inspect_ai/solver/_solver.py +14 -4
  144. inspect_ai/solver/_task_state.py +5 -3
  145. inspect_ai/tool/_tool_call.py +15 -8
  146. inspect_ai/tool/_tool_def.py +17 -12
  147. inspect_ai/tool/_tool_support_helpers.py +2 -2
  148. inspect_ai/tool/_tool_with.py +14 -11
  149. inspect_ai/tool/_tools/_bash_session.py +11 -2
  150. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  151. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  152. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  153. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  154. inspect_ai/tool/_tools/_think.py +1 -1
  155. inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
  156. inspect_ai/util/__init__.py +2 -0
  157. inspect_ai/util/_anyio.py +27 -0
  158. inspect_ai/util/_sandbox/__init__.py +2 -1
  159. inspect_ai/util/_sandbox/context.py +32 -7
  160. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  161. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  162. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  163. inspect_ai/util/_store_model.py +30 -7
  164. inspect_ai/util/_subprocess.py +13 -3
  165. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  166. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +179 -153
  167. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
  168. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  169. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  170. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  171. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  172. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  173. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  174. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  175. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  176. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  177. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  178. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  179. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  180. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
1
- import json
1
+ from itertools import chain
2
+ from typing import TypedDict, cast
2
3
 
3
4
  from openai.types.responses import (
4
5
  FunctionToolParam,
5
- Response,
6
+ ResponseComputerToolCall,
7
+ ResponseComputerToolCallParam,
6
8
  ResponseFunctionToolCall,
7
9
  ResponseFunctionToolCallParam,
8
10
  ResponseInputContentParam,
@@ -18,13 +20,17 @@ from openai.types.responses import (
18
20
  ResponseReasoningItem,
19
21
  ResponseReasoningItemParam,
20
22
  ToolChoiceFunctionParam,
23
+ ToolChoiceTypesParam,
21
24
  ToolParam,
22
25
  )
26
+ from openai.types.responses import Response as OpenAIResponse
27
+ from openai.types.responses.response import IncompleteDetails
23
28
  from openai.types.responses.response_create_params import (
24
29
  ToolChoice as ResponsesToolChoice,
25
30
  )
26
31
  from openai.types.responses.response_input_item_param import FunctionCallOutput, Message
27
32
  from openai.types.responses.response_reasoning_item_param import Summary
33
+ from pydantic import JsonValue
28
34
 
29
35
  from inspect_ai._util.content import (
30
36
  Content,
@@ -35,98 +41,93 @@ from inspect_ai._util.content import (
35
41
  from inspect_ai._util.images import file_as_data_uri
36
42
  from inspect_ai._util.url import is_http_url
37
43
  from inspect_ai.model._call_tools import parse_tool_call
44
+ from inspect_ai.model._chat_message import ChatMessage, ChatMessageAssistant
45
+ from inspect_ai.model._generate_config import GenerateConfig
38
46
  from inspect_ai.model._model_output import ChatCompletionChoice, StopReason
39
47
  from inspect_ai.model._openai import is_o_series
48
+ from inspect_ai.model._openai_computer_use import (
49
+ computer_call_output,
50
+ maybe_computer_use_preview_tool,
51
+ tool_call_from_openai_computer_tool_call,
52
+ )
40
53
  from inspect_ai.tool._tool_call import ToolCall
41
54
  from inspect_ai.tool._tool_choice import ToolChoice
42
55
  from inspect_ai.tool._tool_info import ToolInfo
43
56
 
44
- from ._chat_message import ChatMessage, ChatMessageAssistant
45
-
46
57
 
47
58
  async def openai_responses_inputs(
48
59
  messages: list[ChatMessage], model: str
49
60
  ) -> list[ResponseInputItemParam]:
50
- responses_inputs: list[ResponseInputItemParam] = []
51
- for message in messages:
52
- responses_inputs.extend(await openai_responses_input(message, model))
53
- return responses_inputs
61
+ return [
62
+ item
63
+ for message in messages
64
+ for item in await _openai_input_item_from_chat_message(message, model)
65
+ ]
54
66
 
55
67
 
56
- async def openai_responses_input(
68
+ async def _openai_input_item_from_chat_message(
57
69
  message: ChatMessage, model: str
58
70
  ) -> list[ResponseInputItemParam]:
59
71
  if message.role == "system":
60
- content = await openai_responses_content_list_param(message.content)
61
- if is_o_series(model):
62
- return [Message(type="message", role="developer", content=content)]
63
- else:
64
- return [Message(type="message", role="system", content=content)]
72
+ content = await _openai_responses_content_list_param(message.content)
73
+ return (
74
+ [Message(type="message", role="developer", content=content)]
75
+ if is_o_series(model)
76
+ else [Message(type="message", role="system", content=content)]
77
+ )
65
78
  elif message.role == "user":
66
79
  return [
67
80
  Message(
68
81
  type="message",
69
82
  role="user",
70
- content=await openai_responses_content_list_param(message.content),
83
+ content=await _openai_responses_content_list_param(message.content),
71
84
  )
72
85
  ]
73
86
  elif message.role == "assistant":
74
- reasoning_content = openai_responses_reasponing_content_params(message.content)
75
- if message.content:
76
- formatted_id = str(message.id).replace("resp_", "msg_", 1)
77
- if not formatted_id.startswith("msg_"):
78
- # These messages MUST start with `msg_`.
79
- # As `store=False` for this provider, OpenAI doesn't validate the IDs.
80
- # This will keep them consistent across calls though.
81
- formatted_id = f"msg_{formatted_id}"
82
- text_content = [
83
- ResponseOutputMessageParam(
84
- type="message",
85
- role="assistant",
86
- id=formatted_id,
87
- content=openai_responses_text_content_params(message.content),
88
- status="completed",
89
- )
90
- ]
91
- else:
92
- text_content = []
93
- tools_content = openai_responses_tools_content_params(message.tool_calls)
94
- return reasoning_content + text_content + tools_content
87
+ return _openai_input_items_from_chat_message_assistant(message)
95
88
  elif message.role == "tool":
96
- # TODO: Return ouptut types for internal tools e.g. computer, web_search
97
- if message.error is not None:
98
- output = message.error.message
99
- else:
100
- output = message.text
89
+ if message.internal:
90
+ internal = _model_tool_call_for_internal(message.internal)
91
+ if internal.type == "computer_call":
92
+ return [computer_call_output(message, internal)]
93
+
101
94
  return [
102
95
  FunctionCallOutput(
103
96
  type="function_call_output",
104
97
  call_id=message.tool_call_id or str(message.function),
105
- output=output,
98
+ output=message.error.message
99
+ if message.error is not None
100
+ else message.text,
106
101
  )
107
102
  ]
103
+
108
104
  else:
109
105
  raise ValueError(f"Unexpected message role '{message.role}'")
110
106
 
111
107
 
112
- async def openai_responses_content_list_param(
108
+ async def _openai_responses_content_list_param(
113
109
  content: str | list[Content],
114
110
  ) -> ResponseInputMessageContentListParam:
115
- if isinstance(content, str):
116
- content = [ContentText(text=content)]
117
- return [await openai_responses_content_param(c) for c in content]
111
+ return [
112
+ await _openai_responses_content_param(c)
113
+ for c in ([ContentText(text=content)] if isinstance(content, str) else content)
114
+ ]
118
115
 
119
116
 
120
- async def openai_responses_content_param(content: Content) -> ResponseInputContentParam: # type: ignore[return]
117
+ async def _openai_responses_content_param(
118
+ content: Content,
119
+ ) -> ResponseInputContentParam: # type: ignore[return]
121
120
  if isinstance(content, ContentText):
122
121
  return ResponseInputTextParam(type="input_text", text=content.text)
123
122
  elif isinstance(content, ContentImage):
124
- image_url = content.image
125
- if not is_http_url(image_url):
126
- image_url = await file_as_data_uri(image_url)
127
-
128
123
  return ResponseInputImageParam(
129
- type="input_image", detail=content.detail, image_url=image_url
124
+ type="input_image",
125
+ detail=content.detail,
126
+ image_url=(
127
+ content.image
128
+ if is_http_url(content.image)
129
+ else await file_as_data_uri(content.image)
130
+ ),
130
131
  )
131
132
  else:
132
133
  # TODO: support for files (PDFs) and audio and video whenever
@@ -140,144 +141,297 @@ async def openai_responses_content_param(content: Content) -> ResponseInputConte
140
141
  raise ValueError("Unsupported content type.")
141
142
 
142
143
 
143
- def openai_responses_reasponing_content_params(
144
- content: str | list[Content],
145
- ) -> list[ResponseInputItemParam]:
146
- if isinstance(content, list):
147
- return [
148
- ResponseReasoningItemParam(
149
- type="reasoning",
150
- id=str(c.signature),
151
- summary=[Summary(type="summary_text", text=c.reasoning)],
144
+ def openai_responses_tool_choice(
145
+ tool_choice: ToolChoice, tools: list[ToolParam]
146
+ ) -> ResponsesToolChoice:
147
+ match tool_choice:
148
+ case "none" | "auto":
149
+ return tool_choice
150
+ case "any":
151
+ return "required"
152
+ case _:
153
+ return (
154
+ ToolChoiceTypesParam(type="computer_use_preview")
155
+ if tool_choice.name == "computer"
156
+ and any(tool["type"] == "computer_use_preview" for tool in tools)
157
+ else ToolChoiceFunctionParam(type="function", name=tool_choice.name)
152
158
  )
153
- for c in content
154
- if isinstance(c, ContentReasoning)
155
- ]
156
- else:
157
- return []
158
159
 
159
160
 
160
- def openai_responses_text_content_params(
161
- content: str | list[Content],
162
- ) -> list[ResponseOutputTextParam | ResponseOutputRefusalParam]:
163
- if isinstance(content, str):
164
- content = [ContentText(text=content)]
161
+ def openai_responses_tools(
162
+ tools: list[ToolInfo], config: GenerateConfig
163
+ ) -> list[ToolParam]:
164
+ return [_tool_param_for_tool_info(tool, config) for tool in tools]
165
165
 
166
- params: list[ResponseOutputTextParam | ResponseOutputRefusalParam] = []
167
166
 
168
- for c in content:
169
- if isinstance(c, ContentText):
170
- if c.refusal:
171
- params.append(
172
- ResponseOutputRefusalParam(type="refusal", refusal=c.text)
173
- )
174
- else:
175
- params.append(
176
- ResponseOutputTextParam(
177
- type="output_text", text=c.text, annotations=[]
178
- )
179
- )
180
-
181
- return params
167
+ def openai_responses_chat_choices(
168
+ model: str, response: OpenAIResponse, tools: list[ToolInfo]
169
+ ) -> list[ChatCompletionChoice]:
170
+ message, stop_reason = _chat_message_assistant_from_openai_response(
171
+ model, response, tools
172
+ )
173
+ return [ChatCompletionChoice(message=message, stop_reason=stop_reason)]
182
174
 
183
175
 
184
- def openai_responses_tools_content_params(
185
- tool_calls: list[ToolCall] | None,
186
- ) -> list[ResponseInputItemParam]:
187
- if tool_calls is not None:
188
- return [
189
- ResponseFunctionToolCallParam(
190
- type="function_call",
191
- call_id=call.id,
192
- name=call.function,
193
- arguments=json.dumps(call.arguments),
194
- status="completed",
195
- )
196
- for call in tool_calls
197
- ]
198
- else:
199
- return []
176
+ # The next two function perform transformations between OpenAI types an Inspect
177
+ # ChatMessageAssistant. Here is a diagram that helps visualize the transforms.
178
+ # ┌───────────────────────────┐ ┌───────────────────────────┐ ┌───────────────────────────┐
179
+ # │ OpenAI Response │ │ ChatMessageAssistant │ │ OpenAI Request │
180
+ # │ id: resp_aaaaa │ │ id: resp_aaaaa │ │ id: rs_bbbbbb │
181
+ # │ ┌───────────────────────┐ │ │ ┌───────────────────────┐ │ │ ┌───────────────────────┐ │
182
+ # │ │ output │ │ │ │ content │ │ │ │ input │ │
183
+ # │ │ ┌───────────────────┐ │ │ │ │ ┌───────────────────┐ │ │ │ │ ┌───────────────────┐ │ │
184
+ # │ │ │ type: "reasoning" │ │ │ │ │ │ ContentText │ │ │ │ │ │ type: "reasoning" │ │ │
185
+ # │ │ │ id: "rs_bbbbbb" │ │ │ │ │ │ text: "" │ │ │ │ │ │ id: "rs_bbbbbb" │ │ │
186
+ # │ │ │ summary: [] │ │ │ │ │ └───────────────────┘ │ │ │ │ │ summary: [] │ │ │
187
+ # │ │ └───────────────────┘ │ │ │ │ ┌───────────────────┐ │ │ │ │ ┌───────────────────┐ │ │
188
+ # ┌───────────────────┐ │ │ │ │ │ ContentText │ │ │ │ │ │ type: "message" │ │ │
189
+ # │ │ │ type: "message" │ │ │ │ │ │ text: "text1" │ │ │ │ │ │ id: "msg_ccccccc" │ │ │
190
+ # │ │ │ id: "msg_ccccccc" │ │ │ │ │ └───────────────────┘ │ │ │ │ │ role: "assistant" │ │ │
191
+ # │ │ │ role: "assistant" │ │ │--->│ │ ┌───────────────────┐ │ │--->│ │ │ ┌───────────────┐ │ │ │
192
+ # │ │ │ ┌───────────────┐ │ │ │ │ │ │ ContentText │ │ │ │ │ │ │ Content │ │ │ │
193
+ # │ │ │ │ Content │ │ │ │ │ │ │ text: "text2" │ │ │ │ │ │ │ ┌───────────┐ │ │ │ │
194
+ # │ │ │ │ ┌───────────┐ │ │ │ │ │ └───────────────────────┘ │ │ │ │ │ │"text1" │ │ │ │ │
195
+ # │ │ │ │ │"text1" │ │ │ │ │ │ ┌───────────────────────┐ │ │ │ │ │ └───────────┘ │ │ │ │
196
+ # │ │ │ │ └───────────┘ │ │ │ │ │ │ internal │ │ │ │ │ │ ┌───────────┐ │ │ │ │
197
+ # │ │ │ │ ┌───────────┐ │ │ │ │ │ │ ┌───────────────────┐ │ │ │ │ │ │ │ "text2" │ │ │ │ │
198
+ # │ │ │ │ │ "text2" │ │ │ │ │ │ │ │ reasoning_id: │ │ │ │ │ │ │ └───────────┘ │ │ │ │
199
+ # │ │ │ │ └───────────┘ │ │ │ │ │ │ │ "rs_bbbbbb" │ │ │ │ │ │ └───────────────┘ │ │ │
200
+ # │ │ │ └───────────────┘ │ │ │ │ │ └───────────────────┘ │ │ │ │ └───────────────────┘ │ │
201
+ # │ │ └───────────────────┘ │ │ │ │ ┌───────────────────┐ │ │ │ └───────────────────────┘ │
202
+ # │ └───────────────────────┘ │ │ │ │ output_msg_id: │ │ │ └───────────────────────────┘
203
+ # └───────────────────────────┘ │ │ │ "msg_ccccccc" │ │ │
204
+ # │ │ └───────────────────┘ │ │
205
+ # │ └───────────────────────┘ │
206
+ # └───────────────────────────┘
200
207
 
201
208
 
202
- def openai_responses_tool_choice(tool_choice: ToolChoice) -> ResponsesToolChoice:
203
- match tool_choice:
204
- case "none" | "auto":
205
- return tool_choice
206
- case "any":
207
- return "required"
208
- # TODO: internal tools need to be converted to ToolChoiceTypesParam
209
- case _:
210
- return ToolChoiceFunctionParam(type="function", name=tool_choice.name)
209
+ class _AssistantInternal(TypedDict):
210
+ output_message_id: str | None
211
+ reasoning_id: str | None
211
212
 
212
213
 
213
- def openai_responses_tools(tools: list[ToolInfo]) -> list[ToolParam]:
214
- # TODO: return special types for internal tools
215
- return [
216
- FunctionToolParam(
217
- type="function",
218
- name=tool.name,
219
- description=tool.description,
220
- parameters=tool.parameters.model_dump(exclude_none=True),
221
- strict=False, # default parameters don't work in strict mode
222
- )
223
- for tool in tools
224
- ]
214
+ def _chat_message_assistant_from_openai_response(
215
+ model: str, response: OpenAIResponse, tools: list[ToolInfo]
216
+ ) -> tuple[ChatMessageAssistant, StopReason]:
217
+ """
218
+ Transform OpenAI `Response` into an Inspect `ChatMessageAssistant` and `StopReason`.
225
219
 
220
+ It maps each `ResponseOutputItem` in `output` to a `Content` in the
221
+ `content` field of the `ChatMessageAssistant`.
226
222
 
227
- def openai_responses_chat_choices(
228
- response: Response, tools: list[ToolInfo]
229
- ) -> list[ChatCompletionChoice]:
223
+ It also keeps track of the OpenAI id's for each of the items in `.output`.
224
+ The way we're doing it assumes that there won't be multiple items of the
225
+ same type in the output. This seems ok, but who knows.
226
+ """
230
227
  # determine the StopReason
231
- stop_reason: StopReason = "stop"
232
- if response.incomplete_details is not None:
233
- if response.incomplete_details.reason == "max_output_tokens":
228
+ stop_reason: StopReason
229
+ match response.incomplete_details:
230
+ case IncompleteDetails(reason="max_output_tokens"):
234
231
  stop_reason = "max_tokens"
235
- elif response.incomplete_details.reason == "content_filter":
232
+ case IncompleteDetails(reason="content_filter"):
236
233
  stop_reason = "content_filter"
234
+ case _:
235
+ stop_reason = "stop"
237
236
 
238
237
  # collect output and tool calls
239
238
  message_content: list[Content] = []
240
239
  tool_calls: list[ToolCall] = []
240
+ internal = _AssistantInternal(output_message_id=None, reasoning_id=None)
241
241
  for output in response.output:
242
- if isinstance(output, ResponseOutputMessage):
243
- for content in output.content:
244
- if isinstance(content, ResponseOutputText):
245
- message_content.append(ContentText(text=content.text))
246
- else:
247
- message_content.append(
248
- ContentText(text=content.refusal, refusal=True)
249
- )
250
- elif isinstance(output, ResponseReasoningItem):
251
- reasoning = "\n".join([summary.text for summary in output.summary])
252
- if reasoning:
242
+ match output:
243
+ case ResponseOutputMessage(content=content, id=id):
244
+ assert internal["output_message_id"] is None, "Multiple message outputs"
245
+ internal["output_message_id"] = id
246
+ message_content.extend(
247
+ [
248
+ ContentText(text=c.text)
249
+ if isinstance(c, ResponseOutputText)
250
+ else ContentText(text=c.refusal, refusal=True)
251
+ for c in content
252
+ ]
253
+ )
254
+ case ResponseReasoningItem(summary=summary, id=id):
255
+ assert internal["reasoning_id"] is None, "Multiple reasoning items"
256
+ internal["reasoning_id"] = id
253
257
  message_content.append(
254
- ContentReasoning(signature=output.id, reasoning=reasoning)
258
+ ContentReasoning(reasoning="\n".join([s.text for s in summary]))
255
259
  )
256
- else:
257
- stop_reason = "tool_calls"
258
- if isinstance(output, ResponseFunctionToolCall):
259
- tool_calls.append(
260
- parse_tool_call(
261
- output.call_id,
262
- output.name,
263
- output.arguments,
264
- tools,
260
+ case _:
261
+ stop_reason = "tool_calls"
262
+ match output:
263
+ case ResponseFunctionToolCall():
264
+ tool_calls.append(
265
+ parse_tool_call(
266
+ output.call_id,
267
+ output.name,
268
+ output.arguments,
269
+ tools,
270
+ )
271
+ )
272
+ case ResponseComputerToolCall():
273
+ tool_calls.append(
274
+ tool_call_from_openai_computer_tool_call(output)
275
+ )
276
+ case _:
277
+ raise ValueError(f"Unexpected output type: {output.__class__}")
278
+
279
+ return (
280
+ ChatMessageAssistant(
281
+ id=response.id,
282
+ content=message_content,
283
+ internal=cast(JsonValue, internal),
284
+ tool_calls=tool_calls if len(tool_calls) > 0 else None,
285
+ model=model,
286
+ source="generate",
287
+ ),
288
+ stop_reason,
289
+ )
290
+
291
+
292
+ def _openai_input_items_from_chat_message_assistant(
293
+ message: ChatMessageAssistant,
294
+ ) -> list[ResponseInputItemParam]:
295
+ """
296
+ Transform a `ChatMessageAssistant` into OpenAI `ResponseInputItem`'s for playback to the model.
297
+
298
+ This is essentially the inverse transform of
299
+ `_chat_message_assistant_from_openai_response`. It relies on the `internal`
300
+ field of the `ChatMessageAssistant` to help it provide the proper id's the
301
+ items in the returned list.
302
+ """
303
+ # As currently coded, this code only supports a single OutputMessage and
304
+ # a single ReasoningItem for each Response/ChatMessageAssistant.
305
+ reasoning_item: ResponseReasoningItemParam | None = None
306
+ output_message: ResponseOutputMessageParam | None = None
307
+
308
+ (output_message_id, reasoning_id) = _ids_from_assistant_internal(message)
309
+
310
+ for content in (
311
+ list[ContentText | ContentReasoning]([ContentText(text=message.content)])
312
+ if isinstance(message.content, str)
313
+ else [
314
+ c for c in message.content if isinstance(c, ContentText | ContentReasoning)
315
+ ]
316
+ ):
317
+ match content:
318
+ case ContentReasoning(reasoning=reasoning):
319
+ assert reasoning_item is None, "Multiple reasoning items"
320
+ assert reasoning_id is not None, "Must find reasoning id"
321
+ reasoning_item = ResponseReasoningItemParam(
322
+ type="reasoning",
323
+ id=reasoning_id,
324
+ summary=[Summary(type="summary_text", text=reasoning)],
325
+ )
326
+ case ContentText(text=text, refusal=refusal):
327
+ new_content = (
328
+ ResponseOutputRefusalParam(type="refusal", refusal=text)
329
+ if refusal
330
+ else ResponseOutputTextParam(
331
+ type="output_text", text=text, annotations=[]
265
332
  )
266
333
  )
267
- pass
268
- else:
269
- ## TODO: implement support for internal tools
270
- raise ValueError(f"Unexpected output type: {output.__class__}")
334
+ if output_message is None:
335
+ assert output_message_id is not None, "Missing output message id"
336
+ output_message = ResponseOutputMessageParam(
337
+ type="message",
338
+ role="assistant",
339
+ id=output_message_id,
340
+ content=[new_content],
341
+ status="completed",
342
+ )
343
+ else:
344
+ output_message["content"] = chain(
345
+ output_message["content"], [new_content]
346
+ )
271
347
 
272
- # return choice
273
348
  return [
274
- ChatCompletionChoice(
275
- message=ChatMessageAssistant(
276
- id=response.id,
277
- content=message_content,
278
- tool_calls=tool_calls if len(tool_calls) > 0 else None,
279
- source="generate",
280
- ),
281
- stop_reason=stop_reason,
349
+ item for item in (reasoning_item, output_message) if item
350
+ ] + _tool_call_items_from_assistant_message(message)
351
+
352
+
353
+ def _model_tool_call_for_internal(
354
+ internal: JsonValue | None,
355
+ ) -> ResponseFunctionToolCall | ResponseComputerToolCall:
356
+ assert isinstance(internal, dict), "OpenAI internal must be a dict"
357
+ # TODO: Stop runtime validating these over and over once the code is stable
358
+ match internal.get("type"):
359
+ case "function_call":
360
+ return ResponseFunctionToolCall.model_validate(internal)
361
+ case "computer_call":
362
+ return ResponseComputerToolCall.model_validate(internal)
363
+ case _ as x:
364
+ raise NotImplementedError(f"Unsupported tool call type: {x}")
365
+
366
+
367
+ def _maybe_native_tool_param(
368
+ tool: ToolInfo,
369
+ config: GenerateConfig,
370
+ ) -> ToolParam | None:
371
+ return (
372
+ (
373
+ maybe_computer_use_preview_tool(tool)
374
+ # or self.text_editor_tool_param(tool)
375
+ # or self.bash_tool_param(tool)
282
376
  )
283
- ]
377
+ if config.internal_tools or True
378
+ else None
379
+ )
380
+
381
+
382
+ def _tool_call_items_from_assistant_message(
383
+ message: ChatMessageAssistant,
384
+ ) -> list[ResponseInputItemParam]:
385
+ tool_calls: list[ResponseInputItemParam] = []
386
+ for call in message.tool_calls or []:
387
+ if isinstance(call.internal, dict):
388
+ tool_calls.append(
389
+ cast(
390
+ _ResponseToolCallParam,
391
+ _model_tool_call_for_internal(call.internal).model_dump(),
392
+ )
393
+ )
394
+ else:
395
+ tool_calls.append(
396
+ ResponseFunctionToolCallParam(
397
+ type="function_call",
398
+ call_id=call.id,
399
+ name=call.function,
400
+ arguments=call.function,
401
+ )
402
+ )
403
+
404
+ return tool_calls
405
+
406
+
407
+ def _ids_from_assistant_internal(
408
+ message: ChatMessageAssistant,
409
+ ) -> tuple[str | None, str | None]:
410
+ assert isinstance(message.internal, dict), (
411
+ "OpenAI ChatMessageAssistant internal must be an _AssistantInternal"
412
+ )
413
+ internal = cast(_AssistantInternal, message.internal)
414
+ return (internal["output_message_id"], internal["reasoning_id"])
415
+
416
+
417
+ _ResponseToolCallParam = (
418
+ ResponseFunctionToolCallParam | ResponseComputerToolCallParam
419
+ # | ResponseFileSearchToolCallParam
420
+ # | ResponseFunctionToolCallParam
421
+ # | ResponseFunctionWebSearchParam
422
+ )
423
+
424
+
425
+ def _tool_param_for_tool_info(
426
+ tool: ToolInfo,
427
+ config: GenerateConfig,
428
+ ) -> ToolParam:
429
+ # Use a native tool implementation when available. Otherwise, use the
430
+ # standard tool implementation
431
+ return _maybe_native_tool_param(tool, config) or FunctionToolParam(
432
+ type="function",
433
+ name=tool.name,
434
+ description=tool.description,
435
+ parameters=tool.parameters.model_dump(exclude_none=True),
436
+ strict=False, # default parameters don't work in strict mode
437
+ )