inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,93 @@
1
+ from typing import Any
2
+
3
+ from inspect_ai._util.registry import (
4
+ RegistryInfo,
5
+ is_registry_object,
6
+ registry_unqualified_name,
7
+ set_registry_info,
8
+ )
9
+ from inspect_ai.tool._tool import Tool, ToolResult
10
+ from inspect_ai.tool._tool_description import ToolDescription, set_tool_description
11
+
12
+ from ._agent import Agent
13
+ from ._as_tool import agent_tool_info
14
+ from ._filter import MessageFilter
15
+
16
+
17
+ def handoff(
18
+ agent: Agent,
19
+ description: str | None = None,
20
+ input_filter: MessageFilter | None = None,
21
+ output_filter: MessageFilter | None = None,
22
+ tool_name: str | None = None,
23
+ **agent_kwargs: Any,
24
+ ) -> Tool:
25
+ """Create a tool that enables models to handoff to agents.
26
+
27
+ Args:
28
+ agent: Agent to hand off to.
29
+ description: Handoff tool description (defaults to agent description)
30
+ input_filter: Filter to modify the message history before calling the tool.
31
+ Use the built-in `remove_tools` filter to remove all tool calls
32
+ or alternatively specify a custom `MessageFilter` function.
33
+ output_filter: Filter to modify the message history after calling the tool.
34
+ Use the built-in `last_message` filter to return only the last message
35
+ or alternatively specify a custom `MessageFilter` function.
36
+ tool_name: Alternate tool name (defaults to `transfer_to_{agent_name}`)
37
+ **agent_kwargs: Arguments to curry to `Agent` function (arguments provided here
38
+ will not be presented to the model as part of the tool interface).
39
+
40
+ Returns:
41
+ Tool for handing off to the agent (must be called using `execute_tools()` to be
42
+ properly handled)
43
+ """
44
+ # agent must be registered (so we can get its name)
45
+ if not is_registry_object(agent):
46
+ raise RuntimeError(
47
+ "Agent passed to as_tool was not created by an @agent decorated function"
48
+ )
49
+
50
+ # get tool_info
51
+ tool_info = agent_tool_info(agent, description, **agent_kwargs)
52
+
53
+ # AgentTool calls will be intercepted by execute_tools
54
+ agent_tool = AgentTool(agent, input_filter, output_filter, **agent_kwargs)
55
+ tool_name = tool_name or f"transfer_to_{tool_info.name}"
56
+ set_registry_info(agent_tool, RegistryInfo(type="tool", name=tool_name))
57
+ set_tool_description(
58
+ agent_tool,
59
+ ToolDescription(
60
+ name=tool_name,
61
+ description=tool_info.description,
62
+ parameters=tool_info.parameters,
63
+ ),
64
+ )
65
+ return agent_tool
66
+
67
+
68
+ class AgentTool(Tool):
69
+ def __init__(
70
+ self,
71
+ agent: Agent,
72
+ input_filter: MessageFilter | None = None,
73
+ output_filter: MessageFilter | None = None,
74
+ **kwargs: Any,
75
+ ):
76
+ self.agent = agent
77
+ self.input_filter = input_filter
78
+ self.output_filter = output_filter
79
+ self.kwargs = kwargs
80
+
81
+ @property
82
+ def __name__(self) -> str:
83
+ return registry_unqualified_name(self.agent)
84
+
85
+ async def __call__(self) -> ToolResult:
86
+ raise RuntimeError("AgentTool should not be called directly")
87
+
88
+
89
+ def has_handoff(tools: list[Tool] | None) -> bool:
90
+ if tools:
91
+ return any([isinstance(tool, AgentTool) for tool in tools])
92
+ else:
93
+ return False
@@ -5,8 +5,7 @@ import anyio
5
5
  from inspect_ai.util import display_type, input_panel, sandbox
6
6
  from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
7
7
 
8
- from .._solver import Generate, Solver, solver
9
- from .._task_state import TaskState
8
+ from .._agent import Agent, AgentState, agent
10
9
  from .commands import human_agent_commands
11
10
  from .install import install_human_agent
12
11
  from .panel import HumanAgentPanel
@@ -14,15 +13,15 @@ from .service import run_human_agent_service
14
13
  from .view import ConsoleView, HumanAgentView
15
14
 
16
15
 
17
- @solver
18
- def human_agent(
16
+ @agent
17
+ def human_cli(
19
18
  answer: bool | str = True,
20
19
  intermediate_scoring: bool = False,
21
20
  record_session: bool = True,
22
- ) -> Solver:
23
- """Human solver for agentic tasks that run in a Linux environment.
21
+ ) -> Agent:
22
+ """Human CLI agent for tasks that run in a sandbox.
24
23
 
25
- The Human agent solver installs agent task tools in the default
24
+ The Human CLI agent installs agent task tools in the default
26
25
  sandbox and presents the user with both task instructions and
27
26
  documentation for the various tools (e.g. `task submit`,
28
27
  `task start`, `task stop` `task instructions`, etc.). A human agent panel
@@ -40,12 +39,12 @@ def human_agent(
40
39
  record_session: Record all user commands and outputs in the sandbox bash session.
41
40
 
42
41
  Returns:
43
- Solver: Human agent solver.
42
+ Agent: Human CLI agent.
44
43
  """
45
44
  # we can only run one human agent interaction at a time (use lock to enforce)
46
45
  agent_lock = anyio.Lock()
47
46
 
48
- async def solve(state: TaskState, generate: Generate) -> TaskState:
47
+ async def execute(state: AgentState) -> AgentState:
49
48
  async with agent_lock:
50
49
  # ensure that we have a sandbox to work with
51
50
  try:
@@ -58,7 +57,7 @@ def human_agent(
58
57
  )
59
58
 
60
59
  # helper function to run the agent (called for fullscreen vs. fallback below)
61
- async def run_human_agent(view: HumanAgentView) -> TaskState:
60
+ async def run_human_agent(view: HumanAgentView) -> AgentState:
62
61
  sandbox_proxy = cast(SandboxEnvironmentProxy, sandbox())
63
62
  with sandbox_proxy.no_events():
64
63
  # create agent commands
@@ -67,7 +66,7 @@ def human_agent(
67
66
  )
68
67
 
69
68
  # install agent tools
70
- await install_human_agent(state, commands, record_session)
69
+ await install_human_agent(commands, record_session)
71
70
 
72
71
  # hookup the view ui
73
72
  view.connect(connection)
@@ -82,4 +81,4 @@ def human_agent(
82
81
  else:
83
82
  return await run_human_agent(ConsoleView())
84
83
 
85
- return solve
84
+ return execute
@@ -1,5 +1,4 @@
1
- from inspect_ai.solver._task_state import TaskState
2
-
1
+ from ..._agent import AgentState
3
2
  from .clock import StartCommand, StopCommand
4
3
  from .command import HumanAgentCommand
5
4
  from .instructions import InstructionsCommand
@@ -10,7 +9,7 @@ from .submit import QuitCommand, SubmitCommand, ValidateCommand
10
9
 
11
10
 
12
11
  def human_agent_commands(
13
- state: TaskState,
12
+ state: AgentState,
14
13
  answer: bool | str,
15
14
  intermediate_scoring: bool,
16
15
  record_session: bool,
@@ -62,7 +62,9 @@ class StopCommand(HumanAgentCommand):
62
62
  return stop
63
63
 
64
64
 
65
- def clock_action_event(action: str, state: HumanAgentState) -> None:
65
+ def clock_action_event(
66
+ action: Literal["start", "stop"], state: HumanAgentState
67
+ ) -> None:
66
68
  from inspect_ai.log._transcript import transcript
67
69
 
68
70
  transcript().info(
@@ -8,13 +8,13 @@ from inspect_ai._util.ansi import render_text
8
8
  from inspect_ai.model._model_output import ModelOutput
9
9
  from inspect_ai.scorer._score import score
10
10
 
11
- from ..._task_state import TaskState
11
+ from ..._agent import AgentState
12
12
  from ..state import HumanAgentState, IntermediateScoring
13
13
  from .command import HumanAgentCommand, call_human_agent
14
14
 
15
15
 
16
16
  class ScoreCommand(HumanAgentCommand):
17
- def __init__(self, state: TaskState):
17
+ def __init__(self, state: AgentState):
18
18
  self._state = state
19
19
 
20
20
  @property
@@ -52,9 +52,9 @@ class ScoreCommand(HumanAgentCommand):
52
52
  async def score_task(answer: str | None) -> str:
53
53
  # make a copy of TaskState, add the answer, then score
54
54
  if answer:
55
- task_state = deepcopy(self._state)
56
- task_state.output = ModelOutput.from_content("human_agent", answer)
57
- result = await score(task_state)
55
+ agent_state = deepcopy(self._state)
56
+ agent_state.output = ModelOutput.from_content("human_agent", answer)
57
+ result = await score(agent_state)
58
58
  else:
59
59
  result = await score(self._state)
60
60
 
@@ -3,7 +3,6 @@ from textwrap import dedent
3
3
 
4
4
  from inspect_ai.util import sandbox
5
5
 
6
- from .._task_state import TaskState
7
6
  from .commands.command import HumanAgentCommand
8
7
 
9
8
  INSTALL_DIR = "human_agent_install"
@@ -18,7 +17,7 @@ RECORD_SESSION_DIR = "/var/tmp/user-sessions"
18
17
 
19
18
 
20
19
  async def install_human_agent(
21
- state: TaskState, commands: list[HumanAgentCommand], record_session: bool
20
+ commands: list[HumanAgentCommand], record_session: bool
22
21
  ) -> None:
23
22
  # see if we have already installed
24
23
  if not (await sandbox().exec(["mkdir", HUMAN_AGENT_DIR])).success:
@@ -183,8 +182,12 @@ def human_agent_bashrc(commands: list[HumanAgentCommand], record_session: bool)
183
182
  fi
184
183
  """).lstrip()
185
184
 
185
+ CLOCK = dedent("""
186
+ task start
187
+ """).lstrip()
188
+
186
189
  # return .bashrc
187
- return "\n".join([TERMINAL_CHECK, COMMANDS, RECORDING, INSTRUCTIONS])
190
+ return "\n".join([TERMINAL_CHECK, COMMANDS, RECORDING, INSTRUCTIONS, CLOCK])
188
191
 
189
192
 
190
193
  def human_agent_install_sh() -> str:
@@ -1,20 +1,24 @@
1
+ from inspect_ai.agent._human.commands.clock import clock_action_event
1
2
  from inspect_ai.model import ModelOutput
2
3
  from inspect_ai.util._sandbox import sandbox
3
4
  from inspect_ai.util._sandbox.service import sandbox_service
4
5
 
5
- from .._task_state import TaskState
6
+ from .._agent import AgentState
6
7
  from .commands.command import HumanAgentCommand
7
8
  from .state import HumanAgentState
8
9
  from .view import HumanAgentView
9
10
 
10
11
 
11
12
  async def run_human_agent_service(
12
- state: TaskState, commands: list[HumanAgentCommand], view: HumanAgentView | None
13
- ) -> TaskState:
13
+ state: AgentState, commands: list[HumanAgentCommand], view: HumanAgentView | None
14
+ ) -> AgentState:
14
15
  # initialise agent state
15
16
  instructions = "\n\n".join([message.text for message in state.messages]).strip()
16
17
  agent_state = HumanAgentState(instructions=instructions)
17
18
 
19
+ # record that clock is stopped
20
+ clock_action_event("stop", agent_state)
21
+
18
22
  # extract service methods from commands
19
23
  methods = {
20
24
  command.name: command.service(agent_state)
@@ -1,4 +1,4 @@
1
- from time import time as current_time
1
+ import time as python_time
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
@@ -25,7 +25,7 @@ class HumanAgentState(StoreModel):
25
25
  """Set current running state."""
26
26
  # if we are flipping to running mode then update started running
27
27
  if not self.running_state and running:
28
- self.started_running = current_time()
28
+ self.started_running = python_time.time()
29
29
 
30
30
  # if we are exiting running mode then update accumulated time
31
31
  if self.running_state and not running:
@@ -37,7 +37,7 @@ class HumanAgentState(StoreModel):
37
37
  @property
38
38
  def time(self) -> float:
39
39
  """Total time spend on task."""
40
- running_time = current_time() - self.started_running if self.running else 0
40
+ running_time = python_time.time() - self.started_running if self.running else 0
41
41
  return self.accumulated_time + running_time
42
42
 
43
43
  scorings: list[IntermediateScoring] = Field(default_factory=list)
@@ -50,6 +50,6 @@ class HumanAgentState(StoreModel):
50
50
  """Session logs generated by `script` """
51
51
 
52
52
  # internal state variables used by running and time properties
53
- running_state: bool = Field(default=True)
54
- started_running: float = Field(default_factory=current_time)
53
+ running_state: bool = Field(default=False)
54
+ started_running: float = Field(default_factory=python_time.time)
55
55
  accumulated_time: float = Field(default=0.0)
@@ -0,0 +1,241 @@
1
+ from logging import getLogger
2
+
3
+ from inspect_ai._util._async import is_callable_coroutine
4
+ from inspect_ai.model._call_tools import execute_tools
5
+ from inspect_ai.model._chat_message import (
6
+ ChatMessage,
7
+ ChatMessageSystem,
8
+ ChatMessageUser,
9
+ )
10
+ from inspect_ai.model._model import Model, get_model
11
+ from inspect_ai.scorer._score import score
12
+ from inspect_ai.tool._tool import Tool, ToolResult, tool
13
+ from inspect_ai.tool._tool_call import ToolCall
14
+ from inspect_ai.tool._tool_info import parse_tool_info
15
+ from inspect_ai.tool._tool_with import tool_with
16
+
17
+ from ._agent import Agent, AgentState, agent, agent_with
18
+ from ._handoff import has_handoff
19
+ from ._types import (
20
+ AgentAttempts,
21
+ AgentContinue,
22
+ AgentPrompt,
23
+ AgentSubmit,
24
+ )
25
+
26
+ logger = getLogger(__name__)
27
+
28
+
29
+ @agent
30
+ def react(
31
+ *,
32
+ name: str | None = None,
33
+ description: str | None = None,
34
+ prompt: str | AgentPrompt | None = AgentPrompt(),
35
+ tools: list[Tool] | None = None,
36
+ model: str | Model | Agent | None = None,
37
+ attempts: int | AgentAttempts = 1,
38
+ submit: AgentSubmit = AgentSubmit(),
39
+ on_continue: str | AgentContinue | None = None,
40
+ ) -> Agent:
41
+ """Extensible ReAct agent based on the paper [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629).
42
+
43
+ Provide a `name` and `description` for the agent if you plan on using it
44
+ in a multi-agent system (this is so other agents can clearly identify
45
+ its name and purpose). These fields are not required when using `react()`
46
+ as a top-level solver.
47
+
48
+ The agent runs a tool use loop until the model submits an answer using the
49
+ `submit()` tool. Use `instructions` to tailor the agent's system message
50
+ (the default `instructions` provides a basic ReAct prompt).
51
+
52
+ Use the `attempts` option to enable additional submissions if the initial
53
+ submission(s) are incorrect (by default, no additional attempts are permitted).
54
+
55
+ By default, the model will be urged to continue if it fails to call
56
+ a tool. Customise this behavior using the `on_continue` option.
57
+
58
+ Args:
59
+ name: Agent name (required when using with `handoff()` or `as_tool()`)
60
+ description: Agent description (required when using with `handoff()` or `as_tool()`)
61
+ prompt: Prompt for agent. Includes agent-specific contextual `instructions`
62
+ as well as an optional `assistant_prompt` and `handoff_prompt` (for agents
63
+ that use handoffs). both are provided by default but can be removed or
64
+ customized). Pass `str` to specify the instructions and use the defaults
65
+ for handoff and prompt messages.
66
+ tools: Tools available for the agent.
67
+ model: Model to use for agent (defaults to currently evaluated model).
68
+ attempts: Configure agent to make multiple attempts.
69
+ submit: Configure submit tool used by agent.
70
+ on_continue: Message to play back to the model to urge it to continue.
71
+ Optionally, can also be an async function to call to determine whether
72
+ the loop should continue (executed on every turn) and what message
73
+ to play back.
74
+
75
+ Returns:
76
+ ReAct agent.
77
+ """
78
+ # resolve prompt / system message
79
+ prompt = AgentPrompt(prompt) if isinstance(prompt, str) else prompt
80
+ if prompt:
81
+ prompt_lines: list[str] = []
82
+ if prompt.instructions:
83
+ prompt_lines.append(prompt.instructions)
84
+ if prompt.handoff_prompt and has_handoff(tools):
85
+ prompt_lines.append(prompt.handoff_prompt)
86
+ if prompt.assistant_prompt:
87
+ prompt_lines.append(prompt.assistant_prompt)
88
+ prompt_content = "\n\n".join(prompt_lines).format(submit=submit.name)
89
+ system_message: ChatMessage | None = ChatMessageSystem(content=prompt_content)
90
+ else:
91
+ system_message = None
92
+
93
+ # resolve on_continue
94
+ if on_continue is None:
95
+ on_continue = "If you believe you have completed the task, please call the `submit()` tool with your answer."
96
+ if isinstance(on_continue, str):
97
+ no_tools_continue_message = on_continue
98
+
99
+ async def no_tools_continue(state: AgentState) -> bool | str:
100
+ if state.output is None or not state.output.message.tool_calls:
101
+ return no_tools_continue_message
102
+ else:
103
+ return True
104
+
105
+ on_continue = no_tools_continue
106
+
107
+ # validate that on_continue is async
108
+ if not is_callable_coroutine(on_continue):
109
+ raise ValueError("The on_continue function must be async.")
110
+
111
+ # resolve attempts
112
+ attempts = AgentAttempts(attempts) if isinstance(attempts, int) else attempts
113
+
114
+ # submission tool
115
+ @tool
116
+ def submit_tool() -> Tool:
117
+ async def execute(answer: str) -> ToolResult:
118
+ """Submit an answer for evaluation.
119
+
120
+ Args:
121
+ answer (str): Submitted answer
122
+ """
123
+ return answer
124
+
125
+ return execute
126
+
127
+ # helper to see if there is a submit tool call
128
+ def submitted_answer(tool_calls: list[ToolCall] | None) -> str | None:
129
+ for tool_call in tool_calls or []:
130
+ if tool_call.function == submit.name and tool_call.parse_error is None:
131
+ return str(tool_call.arguments["answer"])
132
+ return None
133
+
134
+ # resolve tools
135
+ tools = tools or []
136
+ tools.append(tool_with(submit_tool(), submit.name, submit.description))
137
+
138
+ async def execute(state: AgentState) -> AgentState:
139
+ # prepend system message if we have one
140
+ if system_message:
141
+ state.messages.insert(0, system_message)
142
+
143
+ # track attempts
144
+ attempt_count = 0
145
+
146
+ # main loop = will terminate after submit (subject to max_attempts)
147
+ # or if a message or token limit is hit
148
+ while True:
149
+ # generate output and append assistant message
150
+ state = await _agent_generate(model, state, tools)
151
+
152
+ # check for context window overflow
153
+ if state.output.stop_reason == "model_length":
154
+ from inspect_ai.log._transcript import transcript
155
+
156
+ transcript().info("Agent terminated: model context window exceeded")
157
+ break
158
+
159
+ # check for a submission
160
+ answer = submitted_answer(state.output.message.tool_calls)
161
+ if answer is not None:
162
+ # remove the tool call and set the output to the answer for scoring
163
+ state.output.message.tool_calls = None
164
+ state.output.completion = (
165
+ f"{state.output.completion}\n\n{answer}".strip()
166
+ )
167
+
168
+ # exit if we are at max_attempts
169
+ attempt_count += 1
170
+ if attempt_count >= attempts.attempts:
171
+ break
172
+
173
+ # exit if the submission is successful
174
+ answer_scores = await score(state)
175
+ if attempts.score_value(answer_scores[0].value) == 1.0:
176
+ break
177
+
178
+ # otherwise notify the model that it was incorrect and continue
179
+ else:
180
+ if callable(attempts.incorrect_message):
181
+ if not is_callable_coroutine(attempts.incorrect_message):
182
+ raise ValueError(
183
+ "The incorrect_message function must be async."
184
+ )
185
+ response_message: str = await attempts.incorrect_message(
186
+ state, answer_scores
187
+ )
188
+ else:
189
+ response_message = attempts.incorrect_message
190
+
191
+ state.messages.append(ChatMessageUser(content=response_message))
192
+
193
+ # no submitted answer, call tools and evaluate whether we should continue
194
+ else:
195
+ if state.output.message.tool_calls:
196
+ # call tool functions
197
+ messages, output = await execute_tools(state.messages, tools)
198
+ state.messages.extend(messages)
199
+ if output:
200
+ state.output = output
201
+
202
+ # check if we should continue....
203
+ do_continue = await on_continue(state)
204
+ if isinstance(do_continue, str):
205
+ state.messages.append(ChatMessageUser(content=do_continue))
206
+ elif do_continue is False:
207
+ break
208
+
209
+ return state
210
+
211
+ if name is not None or description is not None:
212
+ return agent_with(execute, name=name, description=description)
213
+ else:
214
+ return execute
215
+
216
+
217
+ async def _agent_generate(
218
+ model: str | Model | Agent | None, state: AgentState, tools: list[Tool]
219
+ ) -> AgentState:
220
+ # convert model to agent
221
+ if isinstance(model, str | Model) or model is None:
222
+ model = _model_generate(model)
223
+
224
+ # confirm we have a tools param
225
+ agent_tool_info = parse_tool_info(model)
226
+ if "tools" not in agent_tool_info.parameters.properties:
227
+ raise ValueError(
228
+ "Agent passed as model for react agent must have a tools parameter."
229
+ )
230
+
231
+ # call the agent
232
+ return await model(state, tools)
233
+
234
+
235
+ def _model_generate(model: str | Model | None) -> Agent:
236
+ async def generate(state: AgentState, tools: list[Tool]) -> AgentState:
237
+ state.output = await get_model(model).generate(state.messages, tools)
238
+ state.messages.append(state.output.message)
239
+ return state
240
+
241
+ return generate
@@ -0,0 +1,36 @@
1
+ from copy import copy
2
+ from typing import Any
3
+
4
+ from inspect_ai.model._chat_message import ChatMessage, ChatMessageUser
5
+
6
+ from ._agent import Agent, AgentState
7
+
8
+
9
+ async def run(
10
+ agent: Agent, input: str | list[ChatMessage] | AgentState, **agent_kwargs: Any
11
+ ) -> AgentState:
12
+ """Run an agent.
13
+
14
+ The input messages(s) will be copied prior to running so are
15
+ not modified in place.
16
+
17
+ Args:
18
+ agent: Agent to run.
19
+ input: Agent input (string, list of messages, or an `AgentState`).
20
+ **agent_kwargs: Additional arguments to pass to agent.
21
+
22
+ Returns:
23
+ AgentState: Messages and generated output.
24
+ """
25
+ # copy input so we don't mutate it in place
26
+ input = copy(input)
27
+
28
+ # resolve str
29
+ if isinstance(input, str):
30
+ input = [ChatMessageUser(content=input)]
31
+
32
+ # create state
33
+ state = AgentState(messages=input) if isinstance(input, list) else input
34
+
35
+ # run the agent
36
+ return await agent(state, **agent_kwargs)
@@ -0,0 +1,81 @@
1
+ from typing import Awaitable, Callable, NamedTuple, TypeAlias
2
+
3
+ from inspect_ai.agent._agent import AgentState
4
+ from inspect_ai.scorer._metric import Score, ValueToFloat, value_to_float
5
+
6
+ DEFAULT_HANDOFF_PROMPT = """
7
+ You are part of a multi-agent system designed to make agent coordination and
8
+ execution easy. Agents uses two primary abstraction: **Agents** and **Handoffs**.
9
+ An agent encompasses instructions and tools and can hand off a conversation to
10
+ another agent when appropriate. Handoffs are achieved by calling a handoff function,
11
+ generally named `transfer_to_<agent_name>`. Transfers between agents are handled
12
+ seamlessly in the background; do not mention or draw attention to these transfers
13
+ in your conversation with the user.
14
+ """
15
+
16
+
17
+ DEFAULT_ASSISTANT_PROMPT = """
18
+ You are a helpful assistant attempting to submit the best possible answer.
19
+ You have several tools available to help with finding the answer. You will
20
+ see the result of tool calls right after sending the message. If you need
21
+ to perform multiple actions, you can always send more messages with additional
22
+ tool calls. Do some reasoning before your actions, describing what tool calls
23
+ you are going to use and how they fit into your plan.
24
+
25
+ When you have completed the task and have an answer, call the {submit}()
26
+ tool to report it.
27
+ """
28
+
29
+
30
+ class AgentPrompt(NamedTuple):
31
+ """Prompt for agent."""
32
+
33
+ instructions: str | None = None
34
+ """Agent-specific contextual instructions."""
35
+
36
+ handoff_prompt: str | None = DEFAULT_HANDOFF_PROMPT
37
+ """Prompt used when there are additional handoff agents active."""
38
+
39
+ assistant_prompt: str | None = DEFAULT_ASSISTANT_PROMPT
40
+ """Prompt for assistant (covers tool use, submit tool, CoT, etc.)."""
41
+
42
+
43
+ AgentContinue: TypeAlias = Callable[[AgentState], Awaitable[bool | str]]
44
+ """Function called to determine whether the agent should continue.
45
+
46
+ Returns `True` to continue (with no additional messages inserted),
47
+ return `False` to stop. Returns `str` to continue with an additional
48
+ custom user message inserted.
49
+ """
50
+
51
+
52
+ class AgentAttempts(NamedTuple):
53
+ """Configure a react agent to make multiple attempts.
54
+
55
+ Submissions are evaluated using the task's main scorer, with value of 1.0
56
+ indicating a correct answer. Scorer values are converted to float (e.g.
57
+ "C" becomes 1.0) using the standard value_to_float() function. Provide an
58
+ alternate conversion scheme as required via `score_value`.
59
+ """
60
+
61
+ attempts: int = 1
62
+ """Maximum number of attempts."""
63
+
64
+ incorrect_message: str | Callable[[AgentState, list[Score]], Awaitable[str]] = (
65
+ "Your submission was incorrect. Please proceed and attempt to find the correct answer."
66
+ )
67
+ """User message reply for an incorrect submission from the model. Alternatively,
68
+ an async function which returns a message."""
69
+
70
+ score_value: ValueToFloat = value_to_float()
71
+ """Function used to extract float from scores (defaults to standard value_to_float())"""
72
+
73
+
74
+ class AgentSubmit(NamedTuple):
75
+ """Configure the submit tool of a react agent."""
76
+
77
+ name: str = "submit"
78
+ """Name for submit tool."""
79
+
80
+ description: str = "Submit an answer for evaluation."
81
+ """Description of submit tool."""