@roackb2/heddle 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. package/README.md +273 -0
  2. package/dist/examples/conversation.d.ts +2 -0
  3. package/dist/examples/conversation.d.ts.map +1 -0
  4. package/dist/examples/conversation.js +3 -0
  5. package/dist/examples/conversation.js.map +1 -0
  6. package/dist/examples/repo-investigator.d.ts +2 -0
  7. package/dist/examples/repo-investigator.d.ts.map +1 -0
  8. package/dist/examples/repo-investigator.js +57 -0
  9. package/dist/examples/repo-investigator.js.map +1 -0
  10. package/dist/src/__tests__/chat-activity-format.test.d.ts +2 -0
  11. package/dist/src/__tests__/chat-activity-format.test.d.ts.map +1 -0
  12. package/dist/src/__tests__/chat-activity-format.test.js +41 -0
  13. package/dist/src/__tests__/chat-activity-format.test.js.map +1 -0
  14. package/dist/src/__tests__/chat-compaction.test.d.ts +2 -0
  15. package/dist/src/__tests__/chat-compaction.test.d.ts.map +1 -0
  16. package/dist/src/__tests__/chat-compaction.test.js +24 -0
  17. package/dist/src/__tests__/chat-compaction.test.js.map +1 -0
  18. package/dist/src/__tests__/chat-format.test.d.ts +2 -0
  19. package/dist/src/__tests__/chat-format.test.d.ts.map +1 -0
  20. package/dist/src/__tests__/chat-format.test.js +124 -0
  21. package/dist/src/__tests__/chat-format.test.js.map +1 -0
  22. package/dist/src/__tests__/chat-runtime.test.d.ts +2 -0
  23. package/dist/src/__tests__/chat-runtime.test.d.ts.map +1 -0
  24. package/dist/src/__tests__/chat-runtime.test.js +39 -0
  25. package/dist/src/__tests__/chat-runtime.test.js.map +1 -0
  26. package/dist/src/__tests__/core-utils.test.d.ts +2 -0
  27. package/dist/src/__tests__/core-utils.test.d.ts.map +1 -0
  28. package/dist/src/__tests__/core-utils.test.js +87 -0
  29. package/dist/src/__tests__/core-utils.test.js.map +1 -0
  30. package/dist/src/__tests__/llm-factory.test.d.ts +2 -0
  31. package/dist/src/__tests__/llm-factory.test.d.ts.map +1 -0
  32. package/dist/src/__tests__/llm-factory.test.js +45 -0
  33. package/dist/src/__tests__/llm-factory.test.js.map +1 -0
  34. package/dist/src/__tests__/local-commands.test.d.ts +2 -0
  35. package/dist/src/__tests__/local-commands.test.d.ts.map +1 -0
  36. package/dist/src/__tests__/local-commands.test.js +163 -0
  37. package/dist/src/__tests__/local-commands.test.js.map +1 -0
  38. package/dist/src/__tests__/project-approval-rules.test.d.ts +2 -0
  39. package/dist/src/__tests__/project-approval-rules.test.d.ts.map +1 -0
  40. package/dist/src/__tests__/project-approval-rules.test.js +135 -0
  41. package/dist/src/__tests__/project-approval-rules.test.js.map +1 -0
  42. package/dist/src/__tests__/prompts.test.d.ts +2 -0
  43. package/dist/src/__tests__/prompts.test.d.ts.map +1 -0
  44. package/dist/src/__tests__/prompts.test.js +43 -0
  45. package/dist/src/__tests__/prompts.test.js.map +1 -0
  46. package/dist/src/__tests__/run-agent.test.d.ts +2 -0
  47. package/dist/src/__tests__/run-agent.test.d.ts.map +1 -0
  48. package/dist/src/__tests__/run-agent.test.js +1276 -0
  49. package/dist/src/__tests__/run-agent.test.js.map +1 -0
  50. package/dist/src/__tests__/run-shell.command.test.d.ts +2 -0
  51. package/dist/src/__tests__/run-shell.command.test.d.ts.map +1 -0
  52. package/dist/src/__tests__/run-shell.command.test.js +188 -0
  53. package/dist/src/__tests__/run-shell.command.test.js.map +1 -0
  54. package/dist/src/__tests__/smoke.test.d.ts +2 -0
  55. package/dist/src/__tests__/smoke.test.d.ts.map +1 -0
  56. package/dist/src/__tests__/smoke.test.js +314 -0
  57. package/dist/src/__tests__/smoke.test.js.map +1 -0
  58. package/dist/src/__tests__/tools.test.d.ts +2 -0
  59. package/dist/src/__tests__/tools.test.d.ts.map +1 -0
  60. package/dist/src/__tests__/tools.test.js +557 -0
  61. package/dist/src/__tests__/tools.test.js.map +1 -0
  62. package/dist/src/__tests__/trace-format.test.d.ts +2 -0
  63. package/dist/src/__tests__/trace-format.test.d.ts.map +1 -0
  64. package/dist/src/__tests__/trace-format.test.js +148 -0
  65. package/dist/src/__tests__/trace-format.test.js.map +1 -0
  66. package/dist/src/cli/ask.d.ts +11 -0
  67. package/dist/src/cli/ask.d.ts.map +1 -0
  68. package/dist/src/cli/ask.js +59 -0
  69. package/dist/src/cli/ask.js.map +1 -0
  70. package/dist/src/cli/chat/App.d.ts +5 -0
  71. package/dist/src/cli/chat/App.d.ts.map +1 -0
  72. package/dist/src/cli/chat/App.js +348 -0
  73. package/dist/src/cli/chat/App.js.map +1 -0
  74. package/dist/src/cli/chat/actions.d.ts +47 -0
  75. package/dist/src/cli/chat/actions.d.ts.map +1 -0
  76. package/dist/src/cli/chat/actions.js +215 -0
  77. package/dist/src/cli/chat/actions.js.map +1 -0
  78. package/dist/src/cli/chat/components/ActivityPanel.d.ts +11 -0
  79. package/dist/src/cli/chat/components/ActivityPanel.d.ts.map +1 -0
  80. package/dist/src/cli/chat/components/ActivityPanel.js +20 -0
  81. package/dist/src/cli/chat/components/ActivityPanel.js.map +1 -0
  82. package/dist/src/cli/chat/components/ApprovalComposer.d.ts +6 -0
  83. package/dist/src/cli/chat/components/ApprovalComposer.d.ts.map +1 -0
  84. package/dist/src/cli/chat/components/ApprovalComposer.js +30 -0
  85. package/dist/src/cli/chat/components/ApprovalComposer.js.map +1 -0
  86. package/dist/src/cli/chat/components/CommandHintPanel.d.ts +5 -0
  87. package/dist/src/cli/chat/components/CommandHintPanel.d.ts.map +1 -0
  88. package/dist/src/cli/chat/components/CommandHintPanel.js +13 -0
  89. package/dist/src/cli/chat/components/CommandHintPanel.js.map +1 -0
  90. package/dist/src/cli/chat/components/ConversationPanel.d.ts +16 -0
  91. package/dist/src/cli/chat/components/ConversationPanel.d.ts.map +1 -0
  92. package/dist/src/cli/chat/components/ConversationPanel.js +218 -0
  93. package/dist/src/cli/chat/components/ConversationPanel.js.map +1 -0
  94. package/dist/src/cli/chat/components/ModelPickerPanel.d.ts +7 -0
  95. package/dist/src/cli/chat/components/ModelPickerPanel.d.ts.map +1 -0
  96. package/dist/src/cli/chat/components/ModelPickerPanel.js +32 -0
  97. package/dist/src/cli/chat/components/ModelPickerPanel.js.map +1 -0
  98. package/dist/src/cli/chat/components/PromptInput.d.ts +29 -0
  99. package/dist/src/cli/chat/components/PromptInput.d.ts.map +1 -0
  100. package/dist/src/cli/chat/components/PromptInput.js +132 -0
  101. package/dist/src/cli/chat/components/PromptInput.js.map +1 -0
  102. package/dist/src/cli/chat/components/RecentTurnsPanel.d.ts +5 -0
  103. package/dist/src/cli/chat/components/RecentTurnsPanel.d.ts.map +1 -0
  104. package/dist/src/cli/chat/components/RecentTurnsPanel.js +10 -0
  105. package/dist/src/cli/chat/components/RecentTurnsPanel.js.map +1 -0
  106. package/dist/src/cli/chat/components/SessionPickerPanel.d.ts +12 -0
  107. package/dist/src/cli/chat/components/SessionPickerPanel.d.ts.map +1 -0
  108. package/dist/src/cli/chat/components/SessionPickerPanel.js +32 -0
  109. package/dist/src/cli/chat/components/SessionPickerPanel.js.map +1 -0
  110. package/dist/src/cli/chat/components/SlashHintPanel.d.ts +8 -0
  111. package/dist/src/cli/chat/components/SlashHintPanel.d.ts.map +1 -0
  112. package/dist/src/cli/chat/components/SlashHintPanel.js +38 -0
  113. package/dist/src/cli/chat/components/SlashHintPanel.js.map +1 -0
  114. package/dist/src/cli/chat/components/index.d.ts +10 -0
  115. package/dist/src/cli/chat/components/index.d.ts.map +1 -0
  116. package/dist/src/cli/chat/components/index.js +10 -0
  117. package/dist/src/cli/chat/components/index.js.map +1 -0
  118. package/dist/src/cli/chat/format.d.ts +23 -0
  119. package/dist/src/cli/chat/format.d.ts.map +1 -0
  120. package/dist/src/cli/chat/format.js +243 -0
  121. package/dist/src/cli/chat/format.js.map +1 -0
  122. package/dist/src/cli/chat/hooks/useAgentRun.d.ts +61 -0
  123. package/dist/src/cli/chat/hooks/useAgentRun.d.ts.map +1 -0
  124. package/dist/src/cli/chat/hooks/useAgentRun.js +463 -0
  125. package/dist/src/cli/chat/hooks/useAgentRun.js.map +1 -0
  126. package/dist/src/cli/chat/hooks/useApprovalFlow.d.ts +31 -0
  127. package/dist/src/cli/chat/hooks/useApprovalFlow.d.ts.map +1 -0
  128. package/dist/src/cli/chat/hooks/useApprovalFlow.js +145 -0
  129. package/dist/src/cli/chat/hooks/useApprovalFlow.js.map +1 -0
  130. package/dist/src/cli/chat/hooks/useChatSessions.d.ts +23 -0
  131. package/dist/src/cli/chat/hooks/useChatSessions.d.ts.map +1 -0
  132. package/dist/src/cli/chat/hooks/useChatSessions.js +124 -0
  133. package/dist/src/cli/chat/hooks/useChatSessions.js.map +1 -0
  134. package/dist/src/cli/chat/hooks/useProjectApprovals.d.ts +8 -0
  135. package/dist/src/cli/chat/hooks/useProjectApprovals.d.ts.map +1 -0
  136. package/dist/src/cli/chat/hooks/useProjectApprovals.js +33 -0
  137. package/dist/src/cli/chat/hooks/useProjectApprovals.js.map +1 -0
  138. package/dist/src/cli/chat/index.d.ts +4 -0
  139. package/dist/src/cli/chat/index.d.ts.map +1 -0
  140. package/dist/src/cli/chat/index.js +9 -0
  141. package/dist/src/cli/chat/index.js.map +1 -0
  142. package/dist/src/cli/chat/local-commands.d.ts +17 -0
  143. package/dist/src/cli/chat/local-commands.d.ts.map +1 -0
  144. package/dist/src/cli/chat/local-commands.js +180 -0
  145. package/dist/src/cli/chat/local-commands.js.map +1 -0
  146. package/dist/src/cli/chat/panels.d.ts +37 -0
  147. package/dist/src/cli/chat/panels.d.ts.map +1 -0
  148. package/dist/src/cli/chat/panels.js +142 -0
  149. package/dist/src/cli/chat/panels.js.map +1 -0
  150. package/dist/src/cli/chat/runtime.d.ts +26 -0
  151. package/dist/src/cli/chat/runtime.d.ts.map +1 -0
  152. package/dist/src/cli/chat/runtime.js +28 -0
  153. package/dist/src/cli/chat/runtime.js.map +1 -0
  154. package/dist/src/cli/chat/state/approval-rules.d.ts +21 -0
  155. package/dist/src/cli/chat/state/approval-rules.d.ts.map +1 -0
  156. package/dist/src/cli/chat/state/approval-rules.js +196 -0
  157. package/dist/src/cli/chat/state/approval-rules.js.map +1 -0
  158. package/dist/src/cli/chat/state/compaction.d.ts +13 -0
  159. package/dist/src/cli/chat/state/compaction.d.ts.map +1 -0
  160. package/dist/src/cli/chat/state/compaction.js +143 -0
  161. package/dist/src/cli/chat/state/compaction.js.map +1 -0
  162. package/dist/src/cli/chat/state/local-commands.d.ts +17 -0
  163. package/dist/src/cli/chat/state/local-commands.d.ts.map +1 -0
  164. package/dist/src/cli/chat/state/local-commands.js +169 -0
  165. package/dist/src/cli/chat/state/local-commands.js.map +1 -0
  166. package/dist/src/cli/chat/state/storage.d.ts +14 -0
  167. package/dist/src/cli/chat/state/storage.d.ts.map +1 -0
  168. package/dist/src/cli/chat/state/storage.js +144 -0
  169. package/dist/src/cli/chat/state/storage.js.map +1 -0
  170. package/dist/src/cli/chat/state/types.d.ts +69 -0
  171. package/dist/src/cli/chat/state/types.d.ts.map +1 -0
  172. package/dist/src/cli/chat/state/types.js +2 -0
  173. package/dist/src/cli/chat/state/types.js.map +1 -0
  174. package/dist/src/cli/chat/storage.d.ts +13 -0
  175. package/dist/src/cli/chat/storage.d.ts.map +1 -0
  176. package/dist/src/cli/chat/storage.js +126 -0
  177. package/dist/src/cli/chat/storage.js.map +1 -0
  178. package/dist/src/cli/chat/submit.d.ts +28 -0
  179. package/dist/src/cli/chat/submit.d.ts.map +1 -0
  180. package/dist/src/cli/chat/submit.js +90 -0
  181. package/dist/src/cli/chat/submit.js.map +1 -0
  182. package/dist/src/cli/chat/types.d.ts +51 -0
  183. package/dist/src/cli/chat/types.d.ts.map +1 -0
  184. package/dist/src/cli/chat/types.js +2 -0
  185. package/dist/src/cli/chat/types.js.map +1 -0
  186. package/dist/src/cli/chat/use-run-state.d.ts +23 -0
  187. package/dist/src/cli/chat/use-run-state.d.ts.map +1 -0
  188. package/dist/src/cli/chat/use-run-state.js +118 -0
  189. package/dist/src/cli/chat/use-run-state.js.map +1 -0
  190. package/dist/src/cli/chat/use-sessions.d.ts +21 -0
  191. package/dist/src/cli/chat/use-sessions.d.ts.map +1 -0
  192. package/dist/src/cli/chat/use-sessions.js +111 -0
  193. package/dist/src/cli/chat/use-sessions.js.map +1 -0
  194. package/dist/src/cli/chat/utils/format.d.ts +41 -0
  195. package/dist/src/cli/chat/utils/format.d.ts.map +1 -0
  196. package/dist/src/cli/chat/utils/format.js +578 -0
  197. package/dist/src/cli/chat/utils/format.js.map +1 -0
  198. package/dist/src/cli/chat/utils/runtime.d.ts +31 -0
  199. package/dist/src/cli/chat/utils/runtime.d.ts.map +1 -0
  200. package/dist/src/cli/chat/utils/runtime.js +56 -0
  201. package/dist/src/cli/chat/utils/runtime.js.map +1 -0
  202. package/dist/src/cli/chat-actions.d.ts +47 -0
  203. package/dist/src/cli/chat-actions.d.ts.map +1 -0
  204. package/dist/src/cli/chat-actions.js +215 -0
  205. package/dist/src/cli/chat-actions.js.map +1 -0
  206. package/dist/src/cli/chat-format.d.ts +23 -0
  207. package/dist/src/cli/chat-format.d.ts.map +1 -0
  208. package/dist/src/cli/chat-format.js +243 -0
  209. package/dist/src/cli/chat-format.js.map +1 -0
  210. package/dist/src/cli/chat-local-commands.d.ts +17 -0
  211. package/dist/src/cli/chat-local-commands.d.ts.map +1 -0
  212. package/dist/src/cli/chat-local-commands.js +180 -0
  213. package/dist/src/cli/chat-local-commands.js.map +1 -0
  214. package/dist/src/cli/chat-panels.d.ts +37 -0
  215. package/dist/src/cli/chat-panels.d.ts.map +1 -0
  216. package/dist/src/cli/chat-panels.js +142 -0
  217. package/dist/src/cli/chat-panels.js.map +1 -0
  218. package/dist/src/cli/chat-runtime.d.ts +26 -0
  219. package/dist/src/cli/chat-runtime.d.ts.map +1 -0
  220. package/dist/src/cli/chat-runtime.js +28 -0
  221. package/dist/src/cli/chat-runtime.js.map +1 -0
  222. package/dist/src/cli/chat-storage.d.ts +13 -0
  223. package/dist/src/cli/chat-storage.d.ts.map +1 -0
  224. package/dist/src/cli/chat-storage.js +126 -0
  225. package/dist/src/cli/chat-storage.js.map +1 -0
  226. package/dist/src/cli/chat-submit.d.ts +28 -0
  227. package/dist/src/cli/chat-submit.d.ts.map +1 -0
  228. package/dist/src/cli/chat-submit.js +90 -0
  229. package/dist/src/cli/chat-submit.js.map +1 -0
  230. package/dist/src/cli/chat-types.d.ts +51 -0
  231. package/dist/src/cli/chat-types.d.ts.map +1 -0
  232. package/dist/src/cli/chat-types.js +2 -0
  233. package/dist/src/cli/chat-types.js.map +1 -0
  234. package/dist/src/cli/chat.d.ts +4 -0
  235. package/dist/src/cli/chat.d.ts.map +1 -0
  236. package/dist/src/cli/chat.js +153 -0
  237. package/dist/src/cli/chat.js.map +1 -0
  238. package/dist/src/cli/main.d.ts +3 -0
  239. package/dist/src/cli/main.d.ts.map +1 -0
  240. package/dist/src/cli/main.js +190 -0
  241. package/dist/src/cli/main.js.map +1 -0
  242. package/dist/src/cli/useChatRunState.d.ts +23 -0
  243. package/dist/src/cli/useChatRunState.d.ts.map +1 -0
  244. package/dist/src/cli/useChatRunState.js +118 -0
  245. package/dist/src/cli/useChatRunState.js.map +1 -0
  246. package/dist/src/cli/useChatSessions.d.ts +21 -0
  247. package/dist/src/cli/useChatSessions.d.ts.map +1 -0
  248. package/dist/src/cli/useChatSessions.js +111 -0
  249. package/dist/src/cli/useChatSessions.js.map +1 -0
  250. package/dist/src/config.d.ts +4 -0
  251. package/dist/src/config.d.ts.map +1 -0
  252. package/dist/src/config.js +7 -0
  253. package/dist/src/config.js.map +1 -0
  254. package/dist/src/index.d.ts +36 -0
  255. package/dist/src/index.d.ts.map +1 -0
  256. package/dist/src/index.js +31 -0
  257. package/dist/src/index.js.map +1 -0
  258. package/dist/src/llm/anthropic.d.ts +7 -0
  259. package/dist/src/llm/anthropic.d.ts.map +1 -0
  260. package/dist/src/llm/anthropic.js +115 -0
  261. package/dist/src/llm/anthropic.js.map +1 -0
  262. package/dist/src/llm/factory.d.ts +10 -0
  263. package/dist/src/llm/factory.d.ts.map +1 -0
  264. package/dist/src/llm/factory.js +53 -0
  265. package/dist/src/llm/factory.js.map +1 -0
  266. package/dist/src/llm/openai-models.d.ts +15 -0
  267. package/dist/src/llm/openai-models.d.ts.map +1 -0
  268. package/dist/src/llm/openai-models.js +97 -0
  269. package/dist/src/llm/openai-models.js.map +1 -0
  270. package/dist/src/llm/openai.d.ts +10 -0
  271. package/dist/src/llm/openai.d.ts.map +1 -0
  272. package/dist/src/llm/openai.js +170 -0
  273. package/dist/src/llm/openai.js.map +1 -0
  274. package/dist/src/llm/types.d.ts +64 -0
  275. package/dist/src/llm/types.d.ts.map +1 -0
  276. package/dist/src/llm/types.js +5 -0
  277. package/dist/src/llm/types.js.map +1 -0
  278. package/dist/src/prompts/system-prompt.d.ts +6 -0
  279. package/dist/src/prompts/system-prompt.d.ts.map +1 -0
  280. package/dist/src/prompts/system-prompt.js +117 -0
  281. package/dist/src/prompts/system-prompt.js.map +1 -0
  282. package/dist/src/run-agent/history.d.ts +3 -0
  283. package/dist/src/run-agent/history.d.ts.map +1 -0
  284. package/dist/src/run-agent/history.js +36 -0
  285. package/dist/src/run-agent/history.js.map +1 -0
  286. package/dist/src/run-agent/mutation-tracking.d.ts +22 -0
  287. package/dist/src/run-agent/mutation-tracking.d.ts.map +1 -0
  288. package/dist/src/run-agent/mutation-tracking.js +116 -0
  289. package/dist/src/run-agent/mutation-tracking.js.map +1 -0
  290. package/dist/src/run-agent/post-mutation.d.ts +23 -0
  291. package/dist/src/run-agent/post-mutation.d.ts.map +1 -0
  292. package/dist/src/run-agent/post-mutation.js +106 -0
  293. package/dist/src/run-agent/post-mutation.js.map +1 -0
  294. package/dist/src/run-agent/progress-reminders.d.ts +13 -0
  295. package/dist/src/run-agent/progress-reminders.d.ts.map +1 -0
  296. package/dist/src/run-agent/progress-reminders.js +65 -0
  297. package/dist/src/run-agent/progress-reminders.js.map +1 -0
  298. package/dist/src/run-agent/tool-dispatch.d.ts +31 -0
  299. package/dist/src/run-agent/tool-dispatch.d.ts.map +1 -0
  300. package/dist/src/run-agent/tool-dispatch.js +114 -0
  301. package/dist/src/run-agent/tool-dispatch.js.map +1 -0
  302. package/dist/src/run-agent/util.d.ts +10 -0
  303. package/dist/src/run-agent/util.d.ts.map +1 -0
  304. package/dist/src/run-agent/util.js +64 -0
  305. package/dist/src/run-agent/util.js.map +1 -0
  306. package/dist/src/run-agent.d.ts +36 -0
  307. package/dist/src/run-agent.d.ts.map +1 -0
  308. package/dist/src/run-agent.js +477 -0
  309. package/dist/src/run-agent.js.map +1 -0
  310. package/dist/src/tools/edit-file.d.ts +10 -0
  311. package/dist/src/tools/edit-file.d.ts.map +1 -0
  312. package/dist/src/tools/edit-file.js +268 -0
  313. package/dist/src/tools/edit-file.js.map +1 -0
  314. package/dist/src/tools/execute-tool.d.ts +8 -0
  315. package/dist/src/tools/execute-tool.d.ts.map +1 -0
  316. package/dist/src/tools/execute-tool.js +31 -0
  317. package/dist/src/tools/execute-tool.js.map +1 -0
  318. package/dist/src/tools/list-files.d.ts +3 -0
  319. package/dist/src/tools/list-files.d.ts.map +1 -0
  320. package/dist/src/tools/list-files.js +49 -0
  321. package/dist/src/tools/list-files.js.map +1 -0
  322. package/dist/src/tools/read-file.d.ts +3 -0
  323. package/dist/src/tools/read-file.d.ts.map +1 -0
  324. package/dist/src/tools/read-file.js +73 -0
  325. package/dist/src/tools/read-file.js.map +1 -0
  326. package/dist/src/tools/registry.d.ts +11 -0
  327. package/dist/src/tools/registry.d.ts.map +1 -0
  328. package/dist/src/tools/registry.js +27 -0
  329. package/dist/src/tools/registry.js.map +1 -0
  330. package/dist/src/tools/report-state.d.ts +3 -0
  331. package/dist/src/tools/report-state.d.ts.map +1 -0
  332. package/dist/src/tools/report-state.js +63 -0
  333. package/dist/src/tools/report-state.js.map +1 -0
  334. package/dist/src/tools/run-shell.d.ts +41 -0
  335. package/dist/src/tools/run-shell.d.ts.map +1 -0
  336. package/dist/src/tools/run-shell.js +407 -0
  337. package/dist/src/tools/run-shell.js.map +1 -0
  338. package/dist/src/tools/search-files.d.ts +8 -0
  339. package/dist/src/tools/search-files.d.ts.map +1 -0
  340. package/dist/src/tools/search-files.js +86 -0
  341. package/dist/src/tools/search-files.js.map +1 -0
  342. package/dist/src/tools/update-plan.d.ts +8 -0
  343. package/dist/src/tools/update-plan.d.ts.map +1 -0
  344. package/dist/src/tools/update-plan.js +85 -0
  345. package/dist/src/tools/update-plan.js.map +1 -0
  346. package/dist/src/trace/format.d.ts +6 -0
  347. package/dist/src/trace/format.d.ts.map +1 -0
  348. package/dist/src/trace/format.js +94 -0
  349. package/dist/src/trace/format.js.map +1 -0
  350. package/dist/src/trace/recorder.d.ts +11 -0
  351. package/dist/src/trace/recorder.d.ts.map +1 -0
  352. package/dist/src/trace/recorder.js +21 -0
  353. package/dist/src/trace/recorder.js.map +1 -0
  354. package/dist/src/types.d.ts +110 -0
  355. package/dist/src/types.d.ts.map +1 -0
  356. package/dist/src/types.js +6 -0
  357. package/dist/src/types.js.map +1 -0
  358. package/dist/src/utils/budget.d.ts +13 -0
  359. package/dist/src/utils/budget.d.ts.map +1 -0
  360. package/dist/src/utils/budget.js +22 -0
  361. package/dist/src/utils/budget.js.map +1 -0
  362. package/dist/src/utils/errors.d.ts +14 -0
  363. package/dist/src/utils/errors.d.ts.map +1 -0
  364. package/dist/src/utils/errors.js +31 -0
  365. package/dist/src/utils/errors.js.map +1 -0
  366. package/dist/src/utils/logger.d.ts +17 -0
  367. package/dist/src/utils/logger.d.ts.map +1 -0
  368. package/dist/src/utils/logger.js +50 -0
  369. package/dist/src/utils/logger.js.map +1 -0
  370. package/package.json +57 -0
@@ -0,0 +1,1276 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { runAgent } from '../run-agent.js';
3
+ import { createLogger } from '../utils/logger.js';
4
+ const silentLogger = createLogger({ level: 'silent', console: false });
5
+ describe('runAgent', () => {
6
+ it('executes tool calls, appends tool output, and finishes with a final answer', async () => {
7
+ const seenMessages = [];
8
+ const fakeLlm = {
9
+ async chat(messages) {
10
+ seenMessages.push(messages);
11
+ if (seenMessages.length === 1) {
12
+ return {
13
+ content: 'I will inspect the repo first.',
14
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.' } }],
15
+ };
16
+ }
17
+ return {
18
+ content: 'The repo contains README.md and src/.',
19
+ };
20
+ },
21
+ };
22
+ const listFilesTool = {
23
+ name: 'list_files',
24
+ description: 'Lists files in a directory',
25
+ parameters: { type: 'object', properties: {} },
26
+ async execute() {
27
+ return { ok: true, output: 'README.md\nsrc/' };
28
+ },
29
+ };
30
+ const result = await runAgent({
31
+ goal: 'What is in this repo?',
32
+ llm: fakeLlm,
33
+ tools: [listFilesTool],
34
+ maxSteps: 3,
35
+ logger: silentLogger,
36
+ });
37
+ expect(result.outcome).toBe('done');
38
+ expect(result.summary).toBe('The repo contains README.md and src/.');
39
+ expect(result.transcript).toEqual([
40
+ { role: 'user', content: 'What is in this repo?' },
41
+ {
42
+ role: 'assistant',
43
+ content: 'I will inspect the repo first.',
44
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.' } }],
45
+ },
46
+ {
47
+ role: 'tool',
48
+ content: JSON.stringify({ ok: true, output: 'README.md\nsrc/' }),
49
+ toolCallId: 'call-1',
50
+ },
51
+ { role: 'assistant', content: 'The repo contains README.md and src/.' },
52
+ ]);
53
+ expect(seenMessages).toHaveLength(2);
54
+ expect(seenMessages[1]).toContainEqual({
55
+ role: 'tool',
56
+ content: JSON.stringify({ ok: true, output: 'README.md\nsrc/' }),
57
+ toolCallId: 'call-1',
58
+ });
59
+ expect(result.trace.map((event) => event.type)).toEqual([
60
+ 'run.started',
61
+ 'assistant.turn',
62
+ 'tool.call',
63
+ 'tool.result',
64
+ 'assistant.turn',
65
+ 'run.finished',
66
+ ]);
67
+ expect(result.trace[1]).toMatchObject({
68
+ type: 'assistant.turn',
69
+ content: 'I will inspect the repo first.',
70
+ requestedTools: true,
71
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.' } }],
72
+ });
73
+ expect(result.trace[4]).toMatchObject({
74
+ type: 'assistant.turn',
75
+ content: 'The repo contains README.md and src/.',
76
+ requestedTools: false,
77
+ });
78
+ });
79
+ it('records an error outcome when the LLM chat throws a non-abort error', async () => {
80
+ const fakeLlm = {
81
+ async chat() {
82
+ throw new Error('boom');
83
+ },
84
+ };
85
+ const result = await runAgent({
86
+ goal: 'Handle LLM errors gracefully.',
87
+ llm: fakeLlm,
88
+ tools: [],
89
+ maxSteps: 1,
90
+ logger: silentLogger,
91
+ });
92
+ expect(result.outcome).toBe('error');
93
+ expect(result.summary).toBe('LLM error: boom');
94
+ expect(result.trace.some((event) => event.type === 'run.finished')).toBe(true);
95
+ });
96
+ it('records assistant rationale on tool turns when the model provides text with tool calls', async () => {
97
+ const fakeLlm = {
98
+ async chat() {
99
+ return {
100
+ content: 'I will inspect the repo root before answering.',
101
+ diagnostics: {
102
+ rationale: 'I will inspect the repo root before answering.',
103
+ missing: ['Need the top-level file listing'],
104
+ wantedTools: ['list_files'],
105
+ wantedInputs: ['path=.'],
106
+ },
107
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.' } }],
108
+ };
109
+ },
110
+ };
111
+ const listFilesTool = {
112
+ name: 'list_files',
113
+ description: 'Lists files in a directory',
114
+ parameters: { type: 'object', properties: {} },
115
+ async execute() {
116
+ return { ok: true, output: 'README.md\nsrc/' };
117
+ },
118
+ };
119
+ const result = await runAgent({
120
+ goal: 'Inspect this repo.',
121
+ llm: fakeLlm,
122
+ tools: [listFilesTool],
123
+ maxSteps: 1,
124
+ logger: silentLogger,
125
+ });
126
+ expect(result.trace[1]).toMatchObject({
127
+ type: 'assistant.turn',
128
+ content: 'I will inspect the repo root before answering.',
129
+ diagnostics: {
130
+ rationale: 'I will inspect the repo root before answering.',
131
+ missing: ['Need the top-level file listing'],
132
+ wantedTools: ['list_files'],
133
+ wantedInputs: ['path=.'],
134
+ },
135
+ requestedTools: true,
136
+ });
137
+ });
138
+ it('aggregates token usage across model calls', async () => {
139
+ const fakeLlm = {
140
+ async chat(messages) {
141
+ if (messages.some((message) => message.role === 'tool')) {
142
+ return {
143
+ content: 'Done.',
144
+ usage: {
145
+ inputTokens: 140,
146
+ outputTokens: 20,
147
+ totalTokens: 160,
148
+ requests: 1,
149
+ },
150
+ };
151
+ }
152
+ return {
153
+ content: 'Inspecting first.',
154
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.' } }],
155
+ usage: {
156
+ inputTokens: 120,
157
+ outputTokens: 30,
158
+ totalTokens: 150,
159
+ cachedInputTokens: 10,
160
+ reasoningTokens: 6,
161
+ requests: 1,
162
+ },
163
+ };
164
+ },
165
+ };
166
+ const listFilesTool = {
167
+ name: 'list_files',
168
+ description: 'Lists files in a directory',
169
+ parameters: { type: 'object', properties: {} },
170
+ async execute() {
171
+ return { ok: true, output: 'README.md\nsrc/' };
172
+ },
173
+ };
174
+ const result = await runAgent({
175
+ goal: 'Inspect this repo.',
176
+ llm: fakeLlm,
177
+ tools: [listFilesTool],
178
+ maxSteps: 3,
179
+ logger: silentLogger,
180
+ });
181
+ expect(result.usage).toEqual({
182
+ inputTokens: 260,
183
+ outputTokens: 50,
184
+ totalTokens: 310,
185
+ cachedInputTokens: 10,
186
+ reasoningTokens: 6,
187
+ requests: 2,
188
+ });
189
+ });
190
+ it('delivers streamed assistant updates through the dedicated stream callback', async () => {
191
+ const streamUpdates = [];
192
+ const fakeLlm = {
193
+ async chat(_messages, _tools, _signal, onStreamEvent) {
194
+ onStreamEvent?.({ type: 'content.delta', delta: 'Hello' });
195
+ onStreamEvent?.({ type: 'content.delta', delta: ' world' });
196
+ onStreamEvent?.({ type: 'content.done', content: 'Hello world' });
197
+ return {
198
+ content: 'Hello world',
199
+ };
200
+ },
201
+ };
202
+ const result = await runAgent({
203
+ goal: 'Say hello.',
204
+ llm: fakeLlm,
205
+ tools: [],
206
+ maxSteps: 1,
207
+ logger: silentLogger,
208
+ onAssistantStream: (update) => {
209
+ streamUpdates.push(update);
210
+ },
211
+ });
212
+ expect(result.outcome).toBe('done');
213
+ expect(streamUpdates.length).toBeGreaterThanOrEqual(2);
214
+ expect(streamUpdates.some((update) => update.done === false)).toBe(true);
215
+ expect(streamUpdates.at(-1)).toMatchObject({
216
+ step: 1,
217
+ text: 'Hello world',
218
+ done: true,
219
+ });
220
+ });
221
+ it('allows one repeated identical tool call, then blocks excessive repetition', async () => {
222
+ const seenMessages = [];
223
+ const fakeLlm = {
224
+ async chat(messages) {
225
+ seenMessages.push(messages);
226
+ if (seenMessages.length < 4) {
227
+ return {
228
+ toolCalls: [{ id: `call-${seenMessages.length}`, tool: 'list_files', input: { path: '.' } }],
229
+ };
230
+ }
231
+ return {
232
+ content: 'I should stop repeating the same directory listing.',
233
+ };
234
+ },
235
+ };
236
+ const listFilesTool = {
237
+ name: 'list_files',
238
+ description: 'Lists files in a directory',
239
+ parameters: { type: 'object', properties: {} },
240
+ async execute() {
241
+ return { ok: true, output: 'README.md\nsrc/' };
242
+ },
243
+ };
244
+ const result = await runAgent({
245
+ goal: 'Inspect this repo.',
246
+ llm: fakeLlm,
247
+ tools: [listFilesTool],
248
+ maxSteps: 5,
249
+ logger: silentLogger,
250
+ });
251
+ expect(result.outcome).toBe('done');
252
+ expect(result.summary).toBe('I should stop repeating the same directory listing.');
253
+ expect(seenMessages[3]).toContainEqual({
254
+ role: 'tool',
255
+ content: JSON.stringify({
256
+ ok: false,
257
+ error: 'Repeated tool call blocked: list_files was already called 2 times with the same input earlier in this run. Try a different tool or different input.',
258
+ }),
259
+ toolCallId: 'call-3',
260
+ });
261
+ expect(result.trace[9]).toMatchObject({
262
+ type: 'tool.result',
263
+ tool: 'list_files',
264
+ result: {
265
+ ok: false,
266
+ error: 'Repeated tool call blocked: list_files was already called 2 times with the same input earlier in this run. Try a different tool or different input.',
267
+ },
268
+ });
269
+ });
270
+ it('normalizes equivalent path spellings and only blocks them after repeated retries', async () => {
271
+ const seenMessages = [];
272
+ const fakeLlm = {
273
+ async chat(messages) {
274
+ seenMessages.push(structuredClone(messages));
275
+ if (seenMessages.length === 1) {
276
+ return {
277
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.' } }],
278
+ };
279
+ }
280
+ if (seenMessages.length === 2) {
281
+ return {
282
+ toolCalls: [{ id: 'call-2', tool: 'list_files', input: { path: './' } }],
283
+ };
284
+ }
285
+ if (seenMessages.length === 3) {
286
+ return {
287
+ toolCalls: [{ id: 'call-3', tool: 'list_files', input: { path: '.' } }],
288
+ };
289
+ }
290
+ return {
291
+ content: 'I should stop repeating equivalent directory listings.',
292
+ };
293
+ },
294
+ };
295
+ const listFilesTool = {
296
+ name: 'list_files',
297
+ description: 'Lists files in a directory',
298
+ parameters: { type: 'object', properties: {} },
299
+ async execute() {
300
+ return { ok: true, output: 'README.md\nsrc/' };
301
+ },
302
+ };
303
+ const result = await runAgent({
304
+ goal: 'Inspect this repo.',
305
+ llm: fakeLlm,
306
+ tools: [listFilesTool],
307
+ maxSteps: 5,
308
+ logger: silentLogger,
309
+ });
310
+ expect(result.outcome).toBe('done');
311
+ expect(seenMessages[3]).toContainEqual({
312
+ role: 'tool',
313
+ content: JSON.stringify({
314
+ ok: false,
315
+ error: 'Repeated tool call blocked: list_files was already called 2 times with the same input earlier in this run. Try a different tool or different input.',
316
+ }),
317
+ toolCallId: 'call-3',
318
+ });
319
+ });
320
+ it('does not stop the run after repeated recoverable tool misuse errors', async () => {
321
+ const seenMessages = [];
322
+ const fakeLlm = {
323
+ async chat(messages) {
324
+ seenMessages.push(structuredClone(messages));
325
+ if (seenMessages.length === 1) {
326
+ return {
327
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.', maxEntries: 200 } }],
328
+ };
329
+ }
330
+ if (seenMessages.length === 2) {
331
+ return {
332
+ toolCalls: [{ id: 'call-2', tool: 'list_files', input: { path: '.', maxEntries: 100 } }],
333
+ };
334
+ }
335
+ return {
336
+ content: 'I corrected course instead of dying on invalid list_files parameters.',
337
+ };
338
+ },
339
+ };
340
+ const listFilesTool = {
341
+ name: 'list_files',
342
+ description: 'Lists files in a directory',
343
+ parameters: { type: 'object', properties: {} },
344
+ async execute(input) {
345
+ if ('maxEntries' in input) {
346
+ return { ok: false, error: 'Invalid input for list_files. Allowed fields: path. Example: { "path": "." }' };
347
+ }
348
+ return { ok: true, output: 'README.md\nsrc/' };
349
+ },
350
+ };
351
+ const result = await runAgent({
352
+ goal: 'Inspect this repo.',
353
+ llm: fakeLlm,
354
+ tools: [listFilesTool],
355
+ maxSteps: 4,
356
+ logger: silentLogger,
357
+ });
358
+ expect(result.outcome).toBe('done');
359
+ expect(result.summary).toBe('I corrected course instead of dying on invalid list_files parameters.');
360
+ expect(seenMessages[1]).toContainEqual({
361
+ role: 'system',
362
+ content: 'Host reminder: the last tool call failed due to invalid or repeated tool use: Invalid input for list_files. Allowed fields: path. Example: { "path": "." }. Correct the call immediately, switch tools, or use report_state if you are blocked. Do not keep retrying the same failing pattern.',
363
+ });
364
+ });
365
+ it('adds a follow-through reminder after report_state so the next turn acts on the named blocker', async () => {
366
+ const seenMessages = [];
367
+ const fakeLlm = {
368
+ async chat(messages) {
369
+ seenMessages.push(structuredClone(messages));
370
+ if (seenMessages.length === 1) {
371
+ return {
372
+ toolCalls: [{
373
+ id: 'call-1',
374
+ tool: 'report_state',
375
+ input: {
376
+ rationale: 'Need a more precise file slice before editing.',
377
+ missing: ['A specific line range from src/run-agent.ts'],
378
+ nextNeed: 'read_file on src/run-agent.ts with offset 200 and maxLines 80',
379
+ },
380
+ }],
381
+ };
382
+ }
383
+ return {
384
+ content: 'I will act on the concrete blocker next.',
385
+ };
386
+ },
387
+ };
388
+ const reportStateTool = {
389
+ name: 'report_state',
390
+ description: 'Records the current blocker',
391
+ parameters: { type: 'object', properties: {} },
392
+ async execute(input) {
393
+ return { ok: true, output: input };
394
+ },
395
+ };
396
+ await runAgent({
397
+ goal: 'Investigate the next implementation step.',
398
+ llm: fakeLlm,
399
+ tools: [reportStateTool],
400
+ maxSteps: 2,
401
+ logger: silentLogger,
402
+ });
403
+ expect(seenMessages[1]).toContainEqual({
404
+ role: 'system',
405
+ content: 'Host reminder: report_state is only a checkpoint. On the next turn, either do the concrete nextNeed you identified (read_file on src/run-agent.ts with offset 200 and maxLines 80) or finish with the best grounded blocker. Do not repeat the same planning state.',
406
+ });
407
+ });
408
+ it('adds a low-step reminder after extended evidence gathering so the run converges instead of drifting', async () => {
409
+ const seenMessages = [];
410
+ const fakeLlm = {
411
+ async chat(messages) {
412
+ seenMessages.push(structuredClone(messages));
413
+ if (seenMessages.length <= 3) {
414
+ return {
415
+ toolCalls: [{ id: `call-${seenMessages.length}`, tool: 'list_files', input: { path: `path-${seenMessages.length}` } }],
416
+ };
417
+ }
418
+ return {
419
+ content: 'I have enough evidence to stop exploring.',
420
+ };
421
+ },
422
+ };
423
+ const listFilesTool = {
424
+ name: 'list_files',
425
+ description: 'Lists files in a directory',
426
+ parameters: { type: 'object', properties: {} },
427
+ async execute() {
428
+ return { ok: true, output: 'README.md\nsrc/' };
429
+ },
430
+ };
431
+ await runAgent({
432
+ goal: 'Figure out the next concrete step.',
433
+ llm: fakeLlm,
434
+ tools: [listFilesTool],
435
+ maxSteps: 4,
436
+ logger: silentLogger,
437
+ });
438
+ expect(seenMessages[3]).toContainEqual({
439
+ role: 'system',
440
+ content: 'Host reminder: only 1 step(s) remain. Do not spend another turn rephrasing the plan. Either execute the single next concrete action needed to finish, or answer with the best grounded blocker.',
441
+ });
442
+ });
443
+ it('carries prior transcript into a later turn when history is provided', async () => {
444
+ const seenMessages = [];
445
+ const fakeLlm = {
446
+ async chat(messages) {
447
+ seenMessages.push(structuredClone(messages));
448
+ return {
449
+ content: 'I can answer using the earlier conversation context.',
450
+ };
451
+ },
452
+ };
453
+ const result = await runAgent({
454
+ goal: 'What did I ask before this?',
455
+ llm: fakeLlm,
456
+ tools: [],
457
+ history: [
458
+ { role: 'user', content: 'Inspect the repo root.' },
459
+ { role: 'assistant', content: 'The repo contains README.md and src/.' },
460
+ ],
461
+ maxSteps: 1,
462
+ logger: silentLogger,
463
+ });
464
+ expect(seenMessages[0]).toEqual([
465
+ expect.objectContaining({ role: 'system' }),
466
+ { role: 'user', content: 'Inspect the repo root.' },
467
+ { role: 'assistant', content: 'The repo contains README.md and src/.' },
468
+ { role: 'user', content: 'What did I ask before this?' },
469
+ ]);
470
+ expect(result.transcript).toEqual([
471
+ { role: 'user', content: 'Inspect the repo root.' },
472
+ { role: 'assistant', content: 'The repo contains README.md and src/.' },
473
+ { role: 'user', content: 'What did I ask before this?' },
474
+ { role: 'assistant', content: 'I can answer using the earlier conversation context.' },
475
+ ]);
476
+ });
477
+ it('sanitizes unresolved prior tool calls before sending history back to the model', async () => {
478
+ const seenMessages = [];
479
+ const fakeLlm = {
480
+ async chat(messages) {
481
+ seenMessages.push(structuredClone(messages));
482
+ return {
483
+ content: 'I retried without carrying over the interrupted tool call.',
484
+ };
485
+ },
486
+ };
487
+ const result = await runAgent({
488
+ goal: 'Can you try again?',
489
+ llm: fakeLlm,
490
+ tools: [],
491
+ history: [
492
+ { role: 'user', content: 'Continue on test coverage.' },
493
+ {
494
+ role: 'assistant',
495
+ content: 'I will inspect run-agent next.',
496
+ toolCalls: [{ id: 'call-1', tool: 'read_file', input: { path: 'src/run-agent.ts' } }],
497
+ },
498
+ ],
499
+ maxSteps: 1,
500
+ logger: silentLogger,
501
+ });
502
+ expect(seenMessages[0]).toEqual([
503
+ expect.objectContaining({ role: 'system' }),
504
+ { role: 'user', content: 'Continue on test coverage.' },
505
+ { role: 'assistant', content: 'I will inspect run-agent next.' },
506
+ { role: 'user', content: 'Can you try again?' },
507
+ ]);
508
+ expect(result.transcript).toEqual([
509
+ { role: 'user', content: 'Continue on test coverage.' },
510
+ { role: 'assistant', content: 'I will inspect run-agent next.' },
511
+ { role: 'user', content: 'Can you try again?' },
512
+ { role: 'assistant', content: 'I retried without carrying over the interrupted tool call.' },
513
+ ]);
514
+ });
515
+ it('requires approval for tools marked as approval-gated and feeds denials back to the model', async () => {
516
+ const seenMessages = [];
517
+ const fakeLlm = {
518
+ async chat(messages) {
519
+ seenMessages.push(structuredClone(messages));
520
+ if (seenMessages.length === 1) {
521
+ return {
522
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
523
+ };
524
+ }
525
+ return {
526
+ content: 'The mutation command was denied, so I will stop and report that.',
527
+ };
528
+ },
529
+ };
530
+ const mutateTool = {
531
+ name: 'run_shell_mutate',
532
+ description: 'Runs a bounded workspace mutation command',
533
+ requiresApproval: true,
534
+ parameters: { type: 'object', properties: {} },
535
+ async execute() {
536
+ return { ok: true, output: { command: 'yarn test', exitCode: 0, stdout: '', stderr: '' } };
537
+ },
538
+ };
539
+ const result = await runAgent({
540
+ goal: 'Run tests if needed.',
541
+ llm: fakeLlm,
542
+ tools: [mutateTool],
543
+ maxSteps: 3,
544
+ logger: silentLogger,
545
+ approveToolCall: async () => ({ approved: false, reason: 'User denied in test' }),
546
+ });
547
+ expect(result.outcome).toBe('done');
548
+ expect(seenMessages[1]).toContainEqual({
549
+ role: 'tool',
550
+ content: JSON.stringify({
551
+ ok: false,
552
+ error: 'Approval denied for run_shell_mutate: User denied in test',
553
+ }),
554
+ toolCallId: 'call-1',
555
+ });
556
+ expect(result.trace.map((event) => event.type)).toContain('tool.approval_requested');
557
+ expect(result.trace.map((event) => event.type)).toContain('tool.approval_resolved');
558
+ });
559
+ it('records an explicit fallback event when inspect retries through mutate', async () => {
560
+ const fakeLlm = {
561
+ async chat(messages) {
562
+ if (messages.some((message) => message.role === 'tool')) {
563
+ return {
564
+ content: 'The fallback command ran successfully.',
565
+ };
566
+ }
567
+ return {
568
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_inspect', input: { command: 'aws configure list' } }],
569
+ };
570
+ },
571
+ };
572
+ const inspectTool = {
573
+ name: 'run_shell_inspect',
574
+ description: 'Runs a bounded read-only shell command',
575
+ parameters: { type: 'object', properties: {} },
576
+ async execute() {
577
+ return {
578
+ ok: false,
579
+ error: 'Command not allowed by run_shell_inspect policy. This tool only permits bounded commands that match its configured workspace risk/scope rules.',
580
+ };
581
+ },
582
+ };
583
+ const mutateTool = {
584
+ name: 'run_shell_mutate',
585
+ description: 'Runs an approval-gated shell command',
586
+ requiresApproval: true,
587
+ parameters: { type: 'object', properties: {} },
588
+ async execute(input) {
589
+ const command = input.command;
590
+ return { ok: true, output: { command, exitCode: 0, stdout: 'ok', stderr: '' } };
591
+ },
592
+ };
593
+ const result = await runAgent({
594
+ goal: 'Try the AWS CLI command.',
595
+ llm: fakeLlm,
596
+ tools: [inspectTool, mutateTool],
597
+ maxSteps: 3,
598
+ logger: silentLogger,
599
+ approveToolCall: async () => ({ approved: true, reason: 'remembered project approval' }),
600
+ });
601
+ expect(result.outcome).toBe('done');
602
+ expect(result.trace).toContainEqual({
603
+ type: 'tool.fallback',
604
+ fromCall: { id: 'call-1', tool: 'run_shell_inspect', input: { command: 'aws configure list' } },
605
+ toCall: {
606
+ id: 'call-1-mutate-fallback',
607
+ tool: 'run_shell_mutate',
608
+ input: { command: 'aws configure list' },
609
+ },
610
+ reason: 'inspect policy rejected the command',
611
+ step: 1,
612
+ timestamp: expect.any(String),
613
+ });
614
+ expect(result.trace.map((event) => event.type)).toEqual([
615
+ 'run.started',
616
+ 'assistant.turn',
617
+ 'tool.call',
618
+ 'tool.result',
619
+ 'tool.fallback',
620
+ 'tool.approval_requested',
621
+ 'tool.approval_resolved',
622
+ 'tool.call',
623
+ 'tool.result',
624
+ 'assistant.turn',
625
+ 'run.finished',
626
+ ]);
627
+ });
628
+ it('requires repo review and verification before finalizing after a workspace-changing mutate command', async () => {
629
+ const seenMessages = [];
630
+ const fakeLlm = {
631
+ async chat(messages) {
632
+ seenMessages.push(structuredClone(messages));
633
+ const hostReminder = [...messages].reverse().find((message) => message.role === 'system' &&
634
+ message.content.includes('Host requirement: before giving a final answer'));
635
+ if (seenMessages.length === 1) {
636
+ return {
637
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
638
+ };
639
+ }
640
+ if (!hostReminder) {
641
+ return {
642
+ content: 'The workspace-changing command is complete.',
643
+ };
644
+ }
645
+ if (seenMessages.length === 3) {
646
+ return {
647
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_inspect', input: { command: 'git diff --stat' } }],
648
+ };
649
+ }
650
+ if (seenMessages.length === 4) {
651
+ return {
652
+ toolCalls: [{ id: 'call-3', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
653
+ };
654
+ }
655
+ return {
656
+ content: 'Applied the fix and verified the repo state.\n- Changed: fixed src/example.ts via eslint --fix src/example.ts.\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output.\n- Remaining uncertainty: none.',
657
+ };
658
+ },
659
+ };
660
+ const mutateTool = {
661
+ name: 'run_shell_mutate',
662
+ description: 'Runs a bounded workspace mutation or verification command',
663
+ requiresApproval: true,
664
+ parameters: { type: 'object', properties: {} },
665
+ async execute(input) {
666
+ const command = input.command;
667
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
668
+ },
669
+ };
670
+ const inspectTool = {
671
+ name: 'run_shell_inspect',
672
+ description: 'Runs a read-only shell inspection command',
673
+ parameters: { type: 'object', properties: {} },
674
+ async execute(input) {
675
+ const command = input.command;
676
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
677
+ },
678
+ };
679
+ const result = await runAgent({
680
+ goal: 'Apply the fix and tell me it worked.',
681
+ llm: fakeLlm,
682
+ tools: [mutateTool, inspectTool],
683
+ maxSteps: 6,
684
+ logger: silentLogger,
685
+ approveToolCall: async () => ({ approved: true }),
686
+ });
687
+ expect(result.outcome).toBe('done');
688
+ expect(result.summary).toBe('Applied the fix and verified the repo state.\n- Changed: fixed src/example.ts via eslint --fix src/example.ts.\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output.\n- Remaining uncertainty: none.');
689
+ expect(seenMessages[2]).toContainEqual({
690
+ role: 'system',
691
+ content: 'Host requirement: before giving a final answer after a workspace-changing mutate command, you must inspect the resulting repo state with concrete git review evidence such as git status --short or git diff --stat and run a verification command such as yarn test, yarn build, yarn lint, vitest, or tsc. After doing that, then provide the final answer.',
692
+ });
693
+ });
694
+ it('allows a small edit_file change to finish without forced review and verification follow-up', async () => {
695
+ const seenMessages = [];
696
+ const fakeLlm = {
697
+ async chat(messages) {
698
+ seenMessages.push(structuredClone(messages));
699
+ if (seenMessages.length === 1) {
700
+ return {
701
+ toolCalls: [{ id: 'call-1', tool: 'edit_file', input: { path: 'README.md', oldText: 'old', newText: 'new' } }],
702
+ };
703
+ }
704
+ return {
705
+ content: 'I updated the file.',
706
+ };
707
+ },
708
+ };
709
+ const editTool = {
710
+ name: 'edit_file',
711
+ description: 'Edits a file directly in the workspace',
712
+ requiresApproval: true,
713
+ parameters: { type: 'object', properties: {} },
714
+ async execute() {
715
+ return { ok: true, output: { path: 'README.md', action: 'replaced', matchCount: 1 } };
716
+ },
717
+ };
718
+ const result = await runAgent({
719
+ goal: 'Update the README and tell me it worked.',
720
+ llm: fakeLlm,
721
+ tools: [editTool],
722
+ maxSteps: 3,
723
+ logger: silentLogger,
724
+ approveToolCall: async () => ({ approved: true }),
725
+ });
726
+ expect(result.outcome).toBe('done');
727
+ expect(result.summary).toBe('I updated the file.');
728
+ expect(seenMessages.flat().some((message) => message.role === 'system' &&
729
+ message.content.includes('Host requirement: before giving a final answer after a workspace-changing mutate command'))).toBe(false);
730
+ });
731
+ it('requires post-mutation review, verification, and structured summary before finishing', async () => {
732
+ const seenMessages = [];
733
+ let stage = 0;
734
+ const fakeLlm = {
735
+ async chat(messages) {
736
+ stage += 1;
737
+ seenMessages.push(structuredClone(messages));
738
+ if (stage === 1) {
739
+ return {
740
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
741
+ };
742
+ }
743
+ if (stage === 2) {
744
+ return {
745
+ content: 'The change is ready to be reported.',
746
+ };
747
+ }
748
+ if (stage === 3) {
749
+ return {
750
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_inspect', input: { command: 'git diff --stat' } }],
751
+ };
752
+ }
753
+ if (stage === 4) {
754
+ return {
755
+ toolCalls: [{ id: 'call-3', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
756
+ };
757
+ }
758
+ return {
759
+ content: 'Applied the lint fix and checked the repo and test state.\n- Changed: eslint --fix src/example.ts applied to src/example.ts.\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output.\n- Remaining uncertainty: none.',
760
+ };
761
+ },
762
+ };
763
+ const mutateTool = {
764
+ name: 'run_shell_mutate',
765
+ description: 'Runs a bounded workspace mutation or verification command',
766
+ requiresApproval: true,
767
+ parameters: { type: 'object', properties: {} },
768
+ async execute(input) {
769
+ const command = input.command;
770
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
771
+ },
772
+ };
773
+ const inspectTool = {
774
+ name: 'run_shell_inspect',
775
+ description: 'Runs a read-only shell inspection command',
776
+ parameters: { type: 'object', properties: {} },
777
+ async execute(input) {
778
+ const command = input.command;
779
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
780
+ },
781
+ };
782
+ const result = await runAgent({
783
+ goal: 'Apply lint fix and summarize.',
784
+ llm: fakeLlm,
785
+ tools: [mutateTool, inspectTool],
786
+ maxSteps: 8,
787
+ logger: silentLogger,
788
+ approveToolCall: async () => ({ approved: true }),
789
+ });
790
+ expect(result.outcome).toBe('done');
791
+ expect(result.summary).toBe('Applied the lint fix and checked the repo and test state.\n- Changed: eslint --fix src/example.ts applied to src/example.ts.\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output.\n- Remaining uncertainty: none.');
792
+ const hostRequirement = seenMessages
793
+ .flat()
794
+ .find((message) => message.role === 'system' &&
795
+ message.content.includes('Host requirement: before giving a final answer after a workspace-changing mutate command'));
796
+ expect(hostRequirement).toBeDefined();
797
+ expect(hostRequirement?.content).toContain('Host requirement: before giving a final answer after a workspace-changing mutate command');
798
+ });
799
+ it('rejects a vague final answer after mutation follow-up until it includes changed, verified, and remaining uncertainty labels', async () => {
800
+ const seenMessages = [];
801
+ const fakeLlm = {
802
+ async chat(messages) {
803
+ seenMessages.push(structuredClone(messages));
804
+ const structuredReminder = [...messages].reverse().find((message) => message.role === 'system' &&
805
+ message.content.includes('your final answer must start with a short summary sentence or short paragraph'));
806
+ if (seenMessages.length === 1) {
807
+ return {
808
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
809
+ };
810
+ }
811
+ if (seenMessages.length === 2) {
812
+ return {
813
+ content: 'The workspace-changing command is complete.',
814
+ };
815
+ }
816
+ if (seenMessages.length === 3) {
817
+ return {
818
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_inspect', input: { command: 'git diff --stat' } }],
819
+ };
820
+ }
821
+ if (seenMessages.length === 4) {
822
+ return {
823
+ toolCalls: [{ id: 'call-3', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
824
+ };
825
+ }
826
+ if (!structuredReminder) {
827
+ return {
828
+ content: 'I made the change and it looks good.',
829
+ };
830
+ }
831
+ return {
832
+ content: 'Applied the fix and verified the repo state.\n- Changed: fixed src/example.ts via eslint --fix src/example.ts.\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output.\n- Remaining uncertainty: none.',
833
+ };
834
+ },
835
+ };
836
+ const mutateTool = {
837
+ name: 'run_shell_mutate',
838
+ description: 'Runs a bounded workspace mutation or verification command',
839
+ requiresApproval: true,
840
+ parameters: { type: 'object', properties: {} },
841
+ async execute(input) {
842
+ const command = input.command;
843
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
844
+ },
845
+ };
846
+ const inspectTool = {
847
+ name: 'run_shell_inspect',
848
+ description: 'Runs a read-only shell inspection command',
849
+ parameters: { type: 'object', properties: {} },
850
+ async execute(input) {
851
+ const command = input.command;
852
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
853
+ },
854
+ };
855
+ const result = await runAgent({
856
+ goal: 'Apply the fix and tell me it worked.',
857
+ llm: fakeLlm,
858
+ tools: [mutateTool, inspectTool],
859
+ maxSteps: 8,
860
+ logger: silentLogger,
861
+ approveToolCall: async () => ({ approved: true }),
862
+ });
863
+ expect(result.outcome).toBe('done');
864
+ expect(result.summary).toBe('I made the change and it looks good.\n\n- Changed: eslint --fix src/example.ts\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output\n- Remaining uncertainty: none');
865
+ });
866
+ it('rejects a structured summary that omits the actual review and verification commands', async () => {
867
+ const seenMessages = [];
868
+ const fakeLlm = {
869
+ async chat(messages) {
870
+ seenMessages.push(structuredClone(messages));
871
+ const structuredReminder = [...messages].reverse().find((message) => message.role === 'system' &&
872
+ message.content.includes('your final answer must start with a short summary sentence or short paragraph'));
873
+ if (seenMessages.length === 1) {
874
+ return {
875
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
876
+ };
877
+ }
878
+ if (seenMessages.length === 2) {
879
+ return { content: 'The workspace-changing command is complete.' };
880
+ }
881
+ if (seenMessages.length === 3) {
882
+ return {
883
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_inspect', input: { command: 'git diff --stat' } }],
884
+ };
885
+ }
886
+ if (seenMessages.length === 4) {
887
+ return {
888
+ toolCalls: [{ id: 'call-3', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
889
+ };
890
+ }
891
+ if (!structuredReminder) {
892
+ return {
893
+ content: 'Applied the fix and checked it.\n- Changed: fixed src/example.ts.\n- Verified: reviewed the repo and tests passed.\n- Remaining uncertainty: none.',
894
+ };
895
+ }
896
+ return {
897
+ content: 'Applied the fix and verified the repo state.\n- Changed: fixed src/example.ts via eslint --fix src/example.ts.\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output.\n- Remaining uncertainty: none.',
898
+ };
899
+ },
900
+ };
901
+ const mutateTool = {
902
+ name: 'run_shell_mutate',
903
+ description: 'Runs a bounded workspace mutation or verification command',
904
+ requiresApproval: true,
905
+ parameters: { type: 'object', properties: {} },
906
+ async execute(input) {
907
+ const command = input.command;
908
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
909
+ },
910
+ };
911
+ const inspectTool = {
912
+ name: 'run_shell_inspect',
913
+ description: 'Runs a read-only shell inspection command',
914
+ parameters: { type: 'object', properties: {} },
915
+ async execute(input) {
916
+ const command = input.command;
917
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
918
+ },
919
+ };
920
+ const result = await runAgent({
921
+ goal: 'Apply the fix and tell me it worked.',
922
+ llm: fakeLlm,
923
+ tools: [mutateTool, inspectTool],
924
+ maxSteps: 8,
925
+ logger: silentLogger,
926
+ approveToolCall: async () => ({ approved: true }),
927
+ });
928
+ expect(result.outcome).toBe('done');
929
+ expect(result.summary).toBe('Applied the fix and checked it.\n\n- Changed: eslint --fix src/example.ts\n- Verified: git diff --stat => exit 0, no stdout/stderr output; yarn test => exit 0, no stdout/stderr output\n- Remaining uncertainty: none');
930
+ });
931
+ it('returns an interrupted outcome when the host requests a stop between steps', async () => {
932
+ let shouldStop = false;
933
+ const fakeLlm = {
934
+ async chat() {
935
+ shouldStop = true;
936
+ return {
937
+ toolCalls: [{ id: 'call-1', tool: 'list_files', input: { path: '.' } }],
938
+ };
939
+ },
940
+ };
941
+ const listFilesTool = {
942
+ name: 'list_files',
943
+ description: 'Lists files in a directory',
944
+ parameters: { type: 'object', properties: {} },
945
+ async execute() {
946
+ return { ok: true, output: 'README.md\nsrc/' };
947
+ },
948
+ };
949
+ const result = await runAgent({
950
+ goal: 'Inspect this repo and then stop.',
951
+ llm: fakeLlm,
952
+ tools: [listFilesTool],
953
+ maxSteps: 3,
954
+ logger: silentLogger,
955
+ shouldStop: () => shouldStop,
956
+ });
957
+ expect(result.outcome).toBe('interrupted');
958
+ expect(result.summary).toBe('Run interrupted by host request');
959
+ expect(result.trace[result.trace.length - 1]).toMatchObject({
960
+ type: 'run.finished',
961
+ outcome: 'interrupted',
962
+ });
963
+ });
964
+ it('treats bounded file operations as workspace-changing mutate commands that require review and verification follow-up', async () => {
965
+ const seenMessages = [];
966
+ const fakeLlm = {
967
+ async chat(messages) {
968
+ seenMessages.push(structuredClone(messages));
969
+ const hostReminder = [...messages].reverse().find((message) => message.role === 'system' &&
970
+ message.content.includes('Host requirement: before giving a final answer'));
971
+ if (seenMessages.length === 1) {
972
+ return {
973
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'mv docs/old.md docs/new.md' } }],
974
+ };
975
+ }
976
+ if (!hostReminder) {
977
+ return { content: 'I moved the file.' };
978
+ }
979
+ return {
980
+ content: 'Moved the file and completed the required follow-up checks.\n- Changed: moved docs/old.md to docs/new.md.\n- Verified: reviewed git diff --stat and yarn test passed.\n- Remaining uncertainty: none.',
981
+ };
982
+ },
983
+ };
984
+ const mutateTool = {
985
+ name: 'run_shell_mutate',
986
+ description: 'Runs a bounded workspace mutation or verification command',
987
+ requiresApproval: true,
988
+ parameters: { type: 'object', properties: {} },
989
+ async execute(input) {
990
+ const command = input.command;
991
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
992
+ },
993
+ };
994
+ const result = await runAgent({
995
+ goal: 'Move the file and tell me the result.',
996
+ llm: fakeLlm,
997
+ tools: [mutateTool],
998
+ maxSteps: 3,
999
+ logger: silentLogger,
1000
+ approveToolCall: async () => ({ approved: true }),
1001
+ });
1002
+ expect(result.outcome).toBe('max_steps');
1003
+ expect(seenMessages[2]).toContainEqual({
1004
+ role: 'system',
1005
+ content: 'Host requirement: before giving a final answer after a workspace-changing mutate command, you must inspect the resulting repo state with concrete git review evidence such as git status --short or git diff --stat and run a verification command such as yarn test, yarn build, yarn lint, vitest, or tsc. After doing that, then provide the final answer.',
1006
+ });
1007
+ });
1008
+ it('asks for the missing git-native review command when verification already ran after a change', async () => {
1009
+ const seenMessages = [];
1010
+ let stage = 0;
1011
+ const fakeLlm = {
1012
+ async chat(messages) {
1013
+ stage += 1;
1014
+ seenMessages.push(structuredClone(messages));
1015
+ if (stage === 1) {
1016
+ return {
1017
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
1018
+ };
1019
+ }
1020
+ if (stage === 2) {
1021
+ return {
1022
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
1023
+ };
1024
+ }
1025
+ return {
1026
+ content: 'The change is done and verified.',
1027
+ };
1028
+ },
1029
+ };
1030
+ const mutateTool = {
1031
+ name: 'run_shell_mutate',
1032
+ description: 'Runs a bounded workspace mutation or verification command',
1033
+ requiresApproval: true,
1034
+ parameters: { type: 'object', properties: {} },
1035
+ async execute(input) {
1036
+ const command = input.command;
1037
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
1038
+ },
1039
+ };
1040
+ const result = await runAgent({
1041
+ goal: 'Apply the fix and report back.',
1042
+ llm: fakeLlm,
1043
+ tools: [mutateTool],
1044
+ maxSteps: 4,
1045
+ logger: silentLogger,
1046
+ approveToolCall: async () => ({ approved: true }),
1047
+ });
1048
+ expect(result.outcome).toBe('max_steps');
1049
+ expect(seenMessages[3]).toEqual(expect.arrayContaining([
1050
+ {
1051
+ role: 'system',
1052
+ content: expect.stringContaining('Host requirement: before giving a final answer after a workspace-changing mutate command, you must inspect the resulting repo state with concrete git review evidence such as git status --short or git diff --stat and note: verification already captured: yarn test. Additional verification is not required unless the repo state changed again.'),
1053
+ },
1054
+ ]));
1055
+ });
1056
+ it('asks for the missing git-native review command while noting existing verification evidence', async () => {
1057
+ const seenMessages = [];
1058
+ let stage = 0;
1059
+ const fakeLlm = {
1060
+ async chat(messages) {
1061
+ stage += 1;
1062
+ seenMessages.push(structuredClone(messages));
1063
+ if (stage === 1) {
1064
+ return {
1065
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
1066
+ };
1067
+ }
1068
+ if (stage === 2) {
1069
+ return {
1070
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
1071
+ };
1072
+ }
1073
+ if (stage === 3) {
1074
+ return {
1075
+ content: 'The change is ready to be reported.',
1076
+ };
1077
+ }
1078
+ return {
1079
+ toolCalls: [{ id: 'call-3', tool: 'run_shell_inspect', input: { command: 'git status --short' } }],
1080
+ };
1081
+ },
1082
+ };
1083
+ const mutateTool = {
1084
+ name: 'run_shell_mutate',
1085
+ description: 'Runs a bounded workspace mutation or verification command',
1086
+ requiresApproval: true,
1087
+ parameters: { type: 'object', properties: {} },
1088
+ async execute(input) {
1089
+ const command = input.command;
1090
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
1091
+ },
1092
+ };
1093
+ const inspectTool = {
1094
+ name: 'run_shell_inspect',
1095
+ description: 'Runs a read-only shell inspection command',
1096
+ parameters: { type: 'object', properties: {} },
1097
+ async execute(input) {
1098
+ const command = input.command;
1099
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
1100
+ },
1101
+ };
1102
+ const result = await runAgent({
1103
+ goal: 'Apply the fix and report back.',
1104
+ llm: fakeLlm,
1105
+ tools: [mutateTool, inspectTool],
1106
+ maxSteps: 5,
1107
+ logger: silentLogger,
1108
+ approveToolCall: async () => ({ approved: true }),
1109
+ });
1110
+ expect(result.outcome).toBe('max_steps');
1111
+ expect(seenMessages[3]).toEqual(expect.arrayContaining([
1112
+ {
1113
+ role: 'system',
1114
+ content: expect.stringContaining('Host requirement: before giving a final answer after a workspace-changing mutate command, you must inspect the resulting repo state with concrete git review evidence such as git status --short or git diff --stat and note: verification already captured: yarn test. Additional verification is not required unless the repo state changed again.'),
1115
+ },
1116
+ ]));
1117
+ });
1118
+ it('sends immediate review reminders after a workspace change', async () => {
1119
+ const seenMessages = [];
1120
+ let stage = 0;
1121
+ const fakeLlm = {
1122
+ async chat(messages) {
1123
+ stage += 1;
1124
+ seenMessages.push(structuredClone(messages));
1125
+ if (stage === 1) {
1126
+ return {
1127
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
1128
+ };
1129
+ }
1130
+ if (stage === 2) {
1131
+ return {
1132
+ content: 'I am ready to continue.',
1133
+ };
1134
+ }
1135
+ return {
1136
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_inspect', input: { command: 'git status --short' } }],
1137
+ };
1138
+ },
1139
+ };
1140
+ const mutateTool = {
1141
+ name: 'run_shell_mutate',
1142
+ description: 'Runs a bounded workspace mutation or verification command',
1143
+ requiresApproval: true,
1144
+ parameters: { type: 'object', properties: {} },
1145
+ async execute(input) {
1146
+ const command = input.command;
1147
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
1148
+ },
1149
+ };
1150
+ const inspectTool = {
1151
+ name: 'run_shell_inspect',
1152
+ description: 'Runs a read-only shell inspection command',
1153
+ parameters: { type: 'object', properties: {} },
1154
+ async execute(input) {
1155
+ const command = input.command;
1156
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
1157
+ },
1158
+ };
1159
+ const result = await runAgent({
1160
+ goal: 'Apply the fix and report back.',
1161
+ llm: fakeLlm,
1162
+ tools: [mutateTool, inspectTool],
1163
+ maxSteps: 4,
1164
+ logger: silentLogger,
1165
+ approveToolCall: async () => ({ approved: true }),
1166
+ });
1167
+ expect(result.outcome).toBe('max_steps');
1168
+ const immediateReminder = seenMessages[2].find((message) => message.role === 'system' && message.content.includes('Host reminder: you ran a workspace-changing command; inspect the resulting repo state now'));
1169
+ expect(immediateReminder).toBeDefined();
1170
+ });
1171
+ it('sends immediate verification reminders if the review already happened but verification is still pending', async () => {
1172
+ const seenMessages = [];
1173
+ let stage = 0;
1174
+ const fakeLlm = {
1175
+ async chat(messages) {
1176
+ stage += 1;
1177
+ seenMessages.push(structuredClone(messages));
1178
+ if (stage === 1) {
1179
+ return {
1180
+ toolCalls: [{ id: 'call-1', tool: 'run_shell_mutate', input: { command: 'eslint --fix src/example.ts' } }],
1181
+ };
1182
+ }
1183
+ if (stage === 2) {
1184
+ return {
1185
+ toolCalls: [{ id: 'call-2', tool: 'run_shell_inspect', input: { command: 'git diff --stat' } }],
1186
+ };
1187
+ }
1188
+ if (stage === 3) {
1189
+ return {
1190
+ content: 'Ready to summarize.',
1191
+ };
1192
+ }
1193
+ return {
1194
+ toolCalls: [{ id: 'call-3', tool: 'run_shell_mutate', input: { command: 'yarn test' } }],
1195
+ };
1196
+ },
1197
+ };
1198
+ const mutateTool = {
1199
+ name: 'run_shell_mutate',
1200
+ description: 'Runs a bounded workspace mutation or verification command',
1201
+ requiresApproval: true,
1202
+ parameters: { type: 'object', properties: {} },
1203
+ async execute(input) {
1204
+ const command = input.command;
1205
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
1206
+ },
1207
+ };
1208
+ const inspectTool = {
1209
+ name: 'run_shell_inspect',
1210
+ description: 'Runs a read-only shell inspection command',
1211
+ parameters: { type: 'object', properties: {} },
1212
+ async execute(input) {
1213
+ const command = input.command;
1214
+ return { ok: true, output: { command, exitCode: 0, stdout: '', stderr: '' } };
1215
+ },
1216
+ };
1217
+ const result = await runAgent({
1218
+ goal: 'Apply the fix and summarize.',
1219
+ llm: fakeLlm,
1220
+ tools: [mutateTool, inspectTool],
1221
+ maxSteps: 5,
1222
+ logger: silentLogger,
1223
+ approveToolCall: async () => ({ approved: true }),
1224
+ });
1225
+ expect(result.outcome).toBe('max_steps');
1226
+ const immediateReminder = seenMessages[3].find((message) => message.role === 'system' && message.content.includes('Host reminder: you ran a workspace-changing command; run a verification command such as yarn test or yarn build before continuing.'));
1227
+ expect(immediateReminder).toBeDefined();
1228
+ });
1229
+ it('allows a final answer even when a recorded plan still has unfinished items', async () => {
1230
+ let stage = 0;
1231
+ const fakeLlm = {
1232
+ async chat() {
1233
+ stage += 1;
1234
+ if (stage === 1) {
1235
+ return {
1236
+ toolCalls: [{
1237
+ id: 'call-1',
1238
+ tool: 'update_plan',
1239
+ input: {
1240
+ explanation: 'Tracking the implementation steps.',
1241
+ plan: [
1242
+ { step: 'Inspect current implementation', status: 'completed' },
1243
+ { step: 'Implement the next bounded change', status: 'in_progress' },
1244
+ { step: 'Verify with tests', status: 'pending' },
1245
+ ],
1246
+ },
1247
+ }],
1248
+ };
1249
+ }
1250
+ return {
1251
+ content: 'The work is done.',
1252
+ };
1253
+ },
1254
+ };
1255
+ const updatePlanTool = {
1256
+ name: 'update_plan',
1257
+ description: 'Records a short working plan.',
1258
+ parameters: { type: 'object', properties: {} },
1259
+ async execute(input) {
1260
+ return { ok: true, output: input };
1261
+ },
1262
+ };
1263
+ const result = await runAgent({
1264
+ goal: 'Implement the next step.',
1265
+ llm: fakeLlm,
1266
+ tools: [updatePlanTool],
1267
+ maxSteps: 2,
1268
+ logger: silentLogger,
1269
+ });
1270
+ expect(result.outcome).toBe('done');
1271
+ expect(result.summary).toBe('The work is done.');
1272
+ expect(result.transcript.some((message) => message.role === 'system' &&
1273
+ message.content.includes('you recorded a plan and it still has unfinished items'))).toBe(false);
1274
+ });
1275
+ });
1276
+ //# sourceMappingURL=run-agent.test.js.map