@amodalai/runtime 0.1.26 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (445) hide show
  1. package/dist/src/__fixtures__/README.md +88 -0
  2. package/dist/src/__fixtures__/e2e.test.js +211 -0
  3. package/dist/src/__fixtures__/e2e.test.js.map +1 -0
  4. package/dist/src/__fixtures__/smoke-agent/amodal.json +11 -0
  5. package/dist/src/__fixtures__/smoke-agent/automations/delivery-callback-test.json +9 -0
  6. package/dist/src/__fixtures__/smoke-agent/automations/test-auto.md +5 -0
  7. package/dist/src/__fixtures__/smoke-agent/connections/mock-api/access.json +11 -0
  8. package/dist/src/__fixtures__/smoke-agent/connections/mock-api/spec.json +4 -0
  9. package/dist/src/__fixtures__/smoke-agent/connections/mock-api/surface.md +9 -0
  10. package/dist/src/__fixtures__/smoke-agent/connections/mock-mcp/access.json +3 -0
  11. package/dist/src/__fixtures__/smoke-agent/connections/mock-mcp/spec.json +8 -0
  12. package/dist/src/__fixtures__/smoke-agent/evals/basic-eval.md +12 -0
  13. package/dist/src/__fixtures__/smoke-agent/knowledge/test-knowledge.md +3 -0
  14. package/dist/src/__fixtures__/smoke-agent/skills/test-skill/SKILL.md +11 -0
  15. package/dist/src/__fixtures__/smoke-agent/stores/test-items.json +11 -0
  16. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/handler.d.ts +18 -0
  17. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/handler.js +22 -0
  18. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/handler.js.map +1 -0
  19. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/tool.json +17 -0
  20. package/dist/src/__fixtures__/smoke.test.js +1404 -0
  21. package/dist/src/__fixtures__/smoke.test.js.map +1 -0
  22. package/dist/src/__fixtures__/test-env.d.ts +27 -0
  23. package/dist/src/__fixtures__/test-env.js +64 -0
  24. package/dist/src/__fixtures__/test-env.js.map +1 -0
  25. package/dist/src/__fixtures__/test-helpers.d.ts +30 -0
  26. package/dist/src/__fixtures__/test-helpers.js +120 -0
  27. package/dist/src/__fixtures__/test-helpers.js.map +1 -0
  28. package/dist/src/__tests__/test-providers.d.ts +40 -0
  29. package/dist/src/__tests__/test-providers.js +61 -0
  30. package/dist/src/__tests__/test-providers.js.map +1 -0
  31. package/dist/src/agent/agent-types.d.ts +22 -0
  32. package/dist/src/agent/agent-types.js.map +1 -1
  33. package/dist/src/agent/automation-bridge.d.ts +9 -0
  34. package/dist/src/agent/automation-bridge.js +26 -0
  35. package/dist/src/agent/automation-bridge.js.map +1 -1
  36. package/dist/src/agent/automation-bridge.test.js +63 -0
  37. package/dist/src/agent/automation-bridge.test.js.map +1 -1
  38. package/dist/src/agent/local-server.d.ts +1 -8
  39. package/dist/src/agent/local-server.js +398 -163
  40. package/dist/src/agent/local-server.js.map +1 -1
  41. package/dist/src/agent/local-server.test.js +14 -8
  42. package/dist/src/agent/local-server.test.js.map +1 -1
  43. package/dist/src/agent/loop-types.d.ts +254 -0
  44. package/dist/src/agent/loop-types.js +24 -0
  45. package/dist/src/agent/loop-types.js.map +1 -0
  46. package/dist/src/agent/loop.d.ts +31 -0
  47. package/dist/src/agent/loop.js +152 -0
  48. package/dist/src/agent/loop.js.map +1 -0
  49. package/dist/src/agent/loop.test.js +1594 -0
  50. package/dist/src/agent/loop.test.js.map +1 -0
  51. package/dist/src/agent/mcp-config.d.ts +28 -0
  52. package/dist/src/agent/mcp-config.js +57 -0
  53. package/dist/src/agent/mcp-config.js.map +1 -0
  54. package/dist/src/agent/page-builder.js +6 -1
  55. package/dist/src/agent/page-builder.js.map +1 -1
  56. package/dist/src/agent/proactive/delivery-router.d.ts +68 -0
  57. package/dist/src/agent/proactive/delivery-router.js +337 -0
  58. package/dist/src/agent/proactive/delivery-router.js.map +1 -0
  59. package/dist/src/agent/{stores-e2e.test.d.ts → proactive/delivery-router.test.d.ts} +1 -1
  60. package/dist/src/agent/proactive/delivery-router.test.js +455 -0
  61. package/dist/src/agent/proactive/delivery-router.test.js.map +1 -0
  62. package/dist/src/agent/proactive/proactive-runner.d.ts +46 -8
  63. package/dist/src/agent/proactive/proactive-runner.js +67 -37
  64. package/dist/src/agent/proactive/proactive-runner.js.map +1 -1
  65. package/dist/src/agent/proactive/proactive-runner.test.d.ts +1 -1
  66. package/dist/src/agent/proactive/proactive-runner.test.js +73 -87
  67. package/dist/src/agent/proactive/proactive-runner.test.js.map +1 -1
  68. package/dist/src/agent/routes/admin-chat-abort.test.d.ts +6 -0
  69. package/dist/src/agent/routes/admin-chat-abort.test.js +206 -0
  70. package/dist/src/agent/routes/admin-chat-abort.test.js.map +1 -0
  71. package/dist/src/agent/routes/admin-chat.d.ts +15 -3
  72. package/dist/src/agent/routes/admin-chat.js +61 -18
  73. package/dist/src/agent/routes/admin-chat.js.map +1 -1
  74. package/dist/src/agent/routes/automations.js +5 -6
  75. package/dist/src/agent/routes/automations.js.map +1 -1
  76. package/dist/src/agent/routes/evals.d.ts +3 -2
  77. package/dist/src/agent/routes/evals.js +25 -12
  78. package/dist/src/agent/routes/evals.js.map +1 -1
  79. package/dist/src/agent/routes/files.js +7 -9
  80. package/dist/src/agent/routes/files.js.map +1 -1
  81. package/dist/src/agent/routes/inspect.d.ts +6 -2
  82. package/dist/src/agent/routes/inspect.js +31 -17
  83. package/dist/src/agent/routes/inspect.js.map +1 -1
  84. package/dist/src/agent/routes/inspect.test.js +18 -42
  85. package/dist/src/agent/routes/inspect.test.js.map +1 -1
  86. package/dist/src/agent/routes/stores.js +9 -12
  87. package/dist/src/agent/routes/stores.js.map +1 -1
  88. package/dist/src/agent/routes/task.d.ts +15 -3
  89. package/dist/src/agent/routes/task.js +16 -7
  90. package/dist/src/agent/routes/task.js.map +1 -1
  91. package/dist/src/agent/routes/task.test.d.ts +1 -1
  92. package/dist/src/agent/routes/task.test.js +68 -53
  93. package/dist/src/agent/routes/task.test.js.map +1 -1
  94. package/dist/src/agent/routes/webhooks.js +12 -3
  95. package/dist/src/agent/routes/webhooks.js.map +1 -1
  96. package/dist/src/agent/snapshot-server.d.ts +2 -22
  97. package/dist/src/agent/snapshot-server.js +48 -27
  98. package/dist/src/agent/snapshot-server.js.map +1 -1
  99. package/dist/src/agent/states/compacting.d.ts +14 -0
  100. package/dist/src/agent/states/compacting.js +260 -0
  101. package/dist/src/agent/states/compacting.js.map +1 -0
  102. package/dist/src/agent/states/confirming.d.ts +10 -0
  103. package/dist/src/agent/states/confirming.js +79 -0
  104. package/dist/src/agent/states/confirming.js.map +1 -0
  105. package/dist/src/agent/states/dispatching.d.ts +18 -0
  106. package/dist/src/agent/states/dispatching.js +285 -0
  107. package/dist/src/agent/states/dispatching.js.map +1 -0
  108. package/dist/src/agent/states/executing.d.ts +21 -0
  109. package/dist/src/agent/states/executing.js +452 -0
  110. package/dist/src/agent/states/executing.js.map +1 -0
  111. package/dist/src/agent/states/streaming.d.ts +10 -0
  112. package/dist/src/agent/states/streaming.js +169 -0
  113. package/dist/src/agent/states/streaming.js.map +1 -0
  114. package/dist/src/agent/states/thinking.d.ts +13 -0
  115. package/dist/src/agent/states/thinking.js +450 -0
  116. package/dist/src/agent/states/thinking.js.map +1 -0
  117. package/dist/src/agent/token-estimate.d.ts +31 -0
  118. package/dist/src/agent/token-estimate.js +34 -0
  119. package/dist/src/agent/token-estimate.js.map +1 -0
  120. package/dist/src/agent/token-estimate.test.d.ts +6 -0
  121. package/dist/src/agent/token-estimate.test.js +44 -0
  122. package/dist/src/agent/token-estimate.test.js.map +1 -0
  123. package/dist/src/agent/tool-executor-local.js +9 -18
  124. package/dist/src/agent/tool-executor-local.js.map +1 -1
  125. package/dist/src/agent/tool-executor-local.test.js +3 -5
  126. package/dist/src/agent/tool-executor-local.test.js.map +1 -1
  127. package/dist/src/api/create-agent.d.ts +15 -0
  128. package/dist/src/api/create-agent.js +134 -0
  129. package/dist/src/api/create-agent.js.map +1 -0
  130. package/dist/src/api/types.d.ts +66 -0
  131. package/dist/src/api/types.js +7 -0
  132. package/dist/src/api/types.js.map +1 -0
  133. package/dist/src/context/compiler.d.ts +13 -0
  134. package/dist/src/context/compiler.js +358 -0
  135. package/dist/src/context/compiler.js.map +1 -0
  136. package/dist/src/context/compiler.test.d.ts +6 -0
  137. package/dist/src/context/compiler.test.js +532 -0
  138. package/dist/src/context/compiler.test.js.map +1 -0
  139. package/dist/src/context/types.d.ts +110 -0
  140. package/dist/src/context/types.js +7 -0
  141. package/dist/src/context/types.js.map +1 -0
  142. package/dist/src/env-ref.d.ts +13 -0
  143. package/dist/src/env-ref.js +31 -0
  144. package/dist/src/env-ref.js.map +1 -0
  145. package/dist/src/env-ref.test.d.ts +6 -0
  146. package/dist/src/env-ref.test.js +34 -0
  147. package/dist/src/env-ref.test.js.map +1 -0
  148. package/dist/src/errors.d.ts +15 -0
  149. package/dist/src/errors.js +22 -0
  150. package/dist/src/errors.js.map +1 -1
  151. package/dist/src/errors.test.js +2 -2
  152. package/dist/src/errors.test.js.map +1 -1
  153. package/dist/src/events/event-bus.d.ts +54 -0
  154. package/dist/src/events/event-bus.js +84 -0
  155. package/dist/src/events/event-bus.js.map +1 -0
  156. package/dist/src/events/event-bus.test.d.ts +6 -0
  157. package/dist/src/events/event-bus.test.js +112 -0
  158. package/dist/src/events/event-bus.test.js.map +1 -0
  159. package/dist/src/events/events-route.d.ts +36 -0
  160. package/dist/src/events/events-route.js +80 -0
  161. package/dist/src/events/events-route.js.map +1 -0
  162. package/dist/src/events/events-route.test.d.ts +6 -0
  163. package/dist/src/events/events-route.test.js +134 -0
  164. package/dist/src/events/events-route.test.js.map +1 -0
  165. package/dist/src/events/store-event-wrapper.d.ts +19 -0
  166. package/dist/src/events/store-event-wrapper.js +57 -0
  167. package/dist/src/events/store-event-wrapper.js.map +1 -0
  168. package/dist/src/events/store-event-wrapper.test.d.ts +6 -0
  169. package/dist/src/events/store-event-wrapper.test.js +91 -0
  170. package/dist/src/events/store-event-wrapper.test.js.map +1 -0
  171. package/dist/src/index.d.ts +33 -6
  172. package/dist/src/index.js +35 -21
  173. package/dist/src/index.js.map +1 -1
  174. package/dist/src/middleware/auth.d.ts +0 -2
  175. package/dist/src/middleware/auth.js.map +1 -1
  176. package/dist/src/providers/create-provider.d.ts +23 -0
  177. package/dist/src/providers/create-provider.js +185 -0
  178. package/dist/src/providers/create-provider.js.map +1 -0
  179. package/dist/src/providers/create-provider.test.d.ts +6 -0
  180. package/dist/src/providers/create-provider.test.js +95 -0
  181. package/dist/src/providers/create-provider.test.js.map +1 -0
  182. package/dist/src/providers/failover.d.ts +38 -0
  183. package/dist/src/providers/failover.js +147 -0
  184. package/dist/src/providers/failover.js.map +1 -0
  185. package/dist/src/providers/failover.test.d.ts +6 -0
  186. package/dist/src/providers/failover.test.js +169 -0
  187. package/dist/src/providers/failover.test.js.map +1 -0
  188. package/dist/src/providers/search-provider.d.ts +64 -0
  189. package/dist/src/providers/search-provider.js +174 -0
  190. package/dist/src/providers/search-provider.js.map +1 -0
  191. package/dist/src/providers/types.d.ts +118 -0
  192. package/dist/src/providers/types.js +7 -0
  193. package/dist/src/providers/types.js.map +1 -0
  194. package/dist/src/routes/ai-stream.d.ts +28 -10
  195. package/dist/src/routes/ai-stream.js +85 -41
  196. package/dist/src/routes/ai-stream.js.map +1 -1
  197. package/dist/src/routes/chat-new.test.d.ts +6 -0
  198. package/dist/src/routes/chat-new.test.js +107 -0
  199. package/dist/src/routes/chat-new.test.js.map +1 -0
  200. package/dist/src/routes/chat-stream-new.test.d.ts +6 -0
  201. package/dist/src/routes/chat-stream-new.test.js +135 -0
  202. package/dist/src/routes/chat-stream-new.test.js.map +1 -0
  203. package/dist/src/routes/chat-stream.d.ts +20 -4
  204. package/dist/src/routes/chat-stream.js +49 -29
  205. package/dist/src/routes/chat-stream.js.map +1 -1
  206. package/dist/src/routes/chat.d.ts +19 -4
  207. package/dist/src/routes/chat.js +62 -23
  208. package/dist/src/routes/chat.js.map +1 -1
  209. package/dist/src/routes/health.d.ts +3 -2
  210. package/dist/src/routes/health.js.map +1 -1
  211. package/dist/src/routes/route-helpers.d.ts +50 -0
  212. package/dist/src/routes/route-helpers.js +80 -0
  213. package/dist/src/routes/route-helpers.js.map +1 -0
  214. package/dist/src/routes/session-resolver.d.ts +77 -0
  215. package/dist/src/routes/session-resolver.js +109 -0
  216. package/dist/src/routes/session-resolver.js.map +1 -0
  217. package/dist/src/routes/session-resolver.test.d.ts +6 -0
  218. package/dist/src/routes/session-resolver.test.js +207 -0
  219. package/dist/src/routes/session-resolver.test.js.map +1 -0
  220. package/dist/src/routes/webhooks.d.ts +3 -1
  221. package/dist/src/routes/webhooks.js +12 -4
  222. package/dist/src/routes/webhooks.js.map +1 -1
  223. package/dist/src/security/permission-checker.d.ts +80 -0
  224. package/dist/src/security/permission-checker.js +75 -0
  225. package/dist/src/security/permission-checker.js.map +1 -0
  226. package/dist/src/security/permission-checker.test.d.ts +6 -0
  227. package/dist/src/security/permission-checker.test.js +208 -0
  228. package/dist/src/security/permission-checker.test.js.map +1 -0
  229. package/dist/src/server.d.ts +18 -11
  230. package/dist/src/server.js +46 -46
  231. package/dist/src/server.js.map +1 -1
  232. package/dist/src/server.test.d.ts +1 -1
  233. package/dist/src/server.test.js +6 -144
  234. package/dist/src/server.test.js.map +1 -1
  235. package/dist/src/session/drizzle-session-store.d.ts +56 -0
  236. package/dist/src/session/drizzle-session-store.js +203 -0
  237. package/dist/src/session/drizzle-session-store.js.map +1 -0
  238. package/dist/src/session/manager.d.ts +101 -0
  239. package/dist/src/session/manager.js +394 -0
  240. package/dist/src/session/manager.js.map +1 -0
  241. package/dist/src/session/manager.test.d.ts +6 -0
  242. package/dist/src/session/manager.test.js +309 -0
  243. package/dist/src/session/manager.test.js.map +1 -0
  244. package/dist/src/session/pglite-session-store.d.ts +23 -0
  245. package/dist/src/session/pglite-session-store.js +70 -0
  246. package/dist/src/session/pglite-session-store.js.map +1 -0
  247. package/dist/src/session/postgres-session-store.d.ts +44 -0
  248. package/dist/src/session/postgres-session-store.js +138 -0
  249. package/dist/src/session/postgres-session-store.js.map +1 -0
  250. package/dist/src/session/session-builder.d.ts +69 -0
  251. package/dist/src/session/session-builder.js +384 -0
  252. package/dist/src/session/session-builder.js.map +1 -0
  253. package/dist/src/session/session-builder.test.d.ts +6 -0
  254. package/dist/src/session/session-builder.test.js +350 -0
  255. package/dist/src/session/session-builder.test.js.map +1 -0
  256. package/dist/src/session/session-store-selector.d.ts +49 -0
  257. package/dist/src/session/session-store-selector.js +60 -0
  258. package/dist/src/session/session-store-selector.js.map +1 -0
  259. package/dist/src/session/session-store-selector.test.d.ts +6 -0
  260. package/dist/src/session/session-store-selector.test.js +79 -0
  261. package/dist/src/session/session-store-selector.test.js.map +1 -0
  262. package/dist/src/session/store.d.ts +171 -0
  263. package/dist/src/session/store.js +155 -0
  264. package/dist/src/session/store.js.map +1 -0
  265. package/dist/src/session/store.test.d.ts +6 -0
  266. package/dist/src/session/store.test.js +423 -0
  267. package/dist/src/session/store.test.js.map +1 -0
  268. package/dist/src/session/stream-hooks.d.ts +39 -0
  269. package/dist/src/session/stream-hooks.js +7 -0
  270. package/dist/src/session/stream-hooks.js.map +1 -0
  271. package/dist/src/session/tool-context-factory.d.ts +61 -0
  272. package/dist/src/session/tool-context-factory.js +189 -0
  273. package/dist/src/session/tool-context-factory.js.map +1 -0
  274. package/dist/src/session/tool-context-factory.test.d.ts +6 -0
  275. package/dist/src/session/tool-context-factory.test.js +284 -0
  276. package/dist/src/session/tool-context-factory.test.js.map +1 -0
  277. package/dist/src/session/types.d.ts +195 -0
  278. package/dist/src/session/types.js +7 -0
  279. package/dist/src/session/types.js.map +1 -0
  280. package/dist/src/stores/drizzle-store-backend.d.ts +49 -0
  281. package/dist/src/stores/drizzle-store-backend.js +306 -0
  282. package/dist/src/stores/drizzle-store-backend.js.map +1 -0
  283. package/dist/src/stores/drizzle-store-backend.test.d.ts +6 -0
  284. package/dist/src/stores/drizzle-store-backend.test.js +215 -0
  285. package/dist/src/stores/drizzle-store-backend.test.js.map +1 -0
  286. package/dist/src/stores/index.d.ts +4 -0
  287. package/dist/src/stores/index.js +2 -0
  288. package/dist/src/stores/index.js.map +1 -1
  289. package/dist/src/stores/pglite-store-backend.d.ts +16 -19
  290. package/dist/src/stores/pglite-store-backend.js +85 -239
  291. package/dist/src/stores/pglite-store-backend.js.map +1 -1
  292. package/dist/src/stores/postgres-store-backend.d.ts +30 -0
  293. package/dist/src/stores/postgres-store-backend.js +100 -0
  294. package/dist/src/stores/postgres-store-backend.js.map +1 -0
  295. package/dist/src/stores/schema.d.ts +457 -0
  296. package/dist/src/stores/schema.js +59 -0
  297. package/dist/src/stores/schema.js.map +1 -0
  298. package/dist/src/tools/admin-file-tools.d.ts +42 -0
  299. package/dist/src/tools/admin-file-tools.js +714 -0
  300. package/dist/src/tools/admin-file-tools.js.map +1 -0
  301. package/dist/src/tools/admin-file-tools.test.d.ts +6 -0
  302. package/dist/src/tools/admin-file-tools.test.js +521 -0
  303. package/dist/src/tools/admin-file-tools.test.js.map +1 -0
  304. package/dist/src/tools/custom-tool-adapter.d.ts +41 -0
  305. package/dist/src/tools/custom-tool-adapter.js +190 -0
  306. package/dist/src/tools/custom-tool-adapter.js.map +1 -0
  307. package/dist/src/tools/custom-tool-adapter.test.d.ts +6 -0
  308. package/dist/src/tools/custom-tool-adapter.test.js +243 -0
  309. package/dist/src/tools/custom-tool-adapter.test.js.map +1 -0
  310. package/dist/src/tools/dispatch-tool.d.ts +52 -0
  311. package/dist/src/tools/dispatch-tool.js +71 -0
  312. package/dist/src/tools/dispatch-tool.js.map +1 -0
  313. package/dist/src/tools/dispatch-tool.test.d.ts +6 -0
  314. package/dist/src/tools/dispatch-tool.test.js +75 -0
  315. package/dist/src/tools/dispatch-tool.test.js.map +1 -0
  316. package/dist/src/tools/fetch-url-tool.d.ts +23 -0
  317. package/dist/src/tools/fetch-url-tool.js +333 -0
  318. package/dist/src/tools/fetch-url-tool.js.map +1 -0
  319. package/dist/src/tools/fetch-url-tool.test.d.ts +6 -0
  320. package/dist/src/tools/fetch-url-tool.test.js +228 -0
  321. package/dist/src/tools/fetch-url-tool.test.js.map +1 -0
  322. package/dist/src/tools/mcp-tool-adapter.d.ts +18 -0
  323. package/dist/src/tools/mcp-tool-adapter.js +135 -0
  324. package/dist/src/tools/mcp-tool-adapter.js.map +1 -0
  325. package/dist/src/tools/mcp-tool-adapter.test.d.ts +6 -0
  326. package/dist/src/tools/mcp-tool-adapter.test.js +226 -0
  327. package/dist/src/tools/mcp-tool-adapter.test.js.map +1 -0
  328. package/dist/src/tools/registry.d.ts +25 -0
  329. package/dist/src/tools/registry.js +72 -0
  330. package/dist/src/tools/registry.js.map +1 -0
  331. package/dist/src/tools/registry.test.d.ts +6 -0
  332. package/dist/src/tools/registry.test.js +120 -0
  333. package/dist/src/tools/registry.test.js.map +1 -0
  334. package/dist/src/tools/request-tool.d.ts +42 -0
  335. package/dist/src/tools/request-tool.js +190 -0
  336. package/dist/src/tools/request-tool.js.map +1 -0
  337. package/dist/src/tools/request-tool.test.d.ts +6 -0
  338. package/dist/src/tools/request-tool.test.js +253 -0
  339. package/dist/src/tools/request-tool.test.js.map +1 -0
  340. package/dist/src/tools/store-tools.d.ts +29 -0
  341. package/dist/src/tools/store-tools.js +224 -0
  342. package/dist/src/tools/store-tools.js.map +1 -0
  343. package/dist/src/tools/store-tools.test.d.ts +6 -0
  344. package/dist/src/tools/store-tools.test.js +215 -0
  345. package/dist/src/tools/store-tools.test.js.map +1 -0
  346. package/dist/src/tools/types.d.ts +129 -0
  347. package/dist/src/tools/types.js +7 -0
  348. package/dist/src/tools/types.js.map +1 -0
  349. package/dist/src/tools/web-search-tool.d.ts +31 -0
  350. package/dist/src/tools/web-search-tool.js +170 -0
  351. package/dist/src/tools/web-search-tool.js.map +1 -0
  352. package/dist/src/tools/web-search-tool.test.d.ts +6 -0
  353. package/dist/src/tools/web-search-tool.test.js +153 -0
  354. package/dist/src/tools/web-search-tool.test.js.map +1 -0
  355. package/dist/src/tools/web-tools-shared.d.ts +21 -0
  356. package/dist/src/tools/web-tools-shared.js +32 -0
  357. package/dist/src/tools/web-tools-shared.js.map +1 -0
  358. package/dist/src/types.d.ts +40 -12
  359. package/dist/src/types.js +16 -2
  360. package/dist/src/types.js.map +1 -1
  361. package/dist/tsconfig.tsbuildinfo +1 -1
  362. package/package.json +27 -4
  363. package/dist/src/__tests__/sse-contract.test.js +0 -464
  364. package/dist/src/__tests__/sse-contract.test.js.map +0 -1
  365. package/dist/src/__tests__/tools.test.js +0 -583
  366. package/dist/src/__tests__/tools.test.js.map +0 -1
  367. package/dist/src/agent/agent-runner.d.ts +0 -33
  368. package/dist/src/agent/agent-runner.js +0 -1040
  369. package/dist/src/agent/agent-runner.js.map +0 -1
  370. package/dist/src/agent/custom-tools-e2e.test.d.ts +0 -6
  371. package/dist/src/agent/custom-tools-e2e.test.js +0 -566
  372. package/dist/src/agent/custom-tools-e2e.test.js.map +0 -1
  373. package/dist/src/agent/request-helper.d.ts +0 -16
  374. package/dist/src/agent/request-helper.js +0 -96
  375. package/dist/src/agent/request-helper.js.map +0 -1
  376. package/dist/src/agent/session-store.d.ts +0 -62
  377. package/dist/src/agent/session-store.js +0 -151
  378. package/dist/src/agent/session-store.js.map +0 -1
  379. package/dist/src/agent/stores-e2e.test.js +0 -433
  380. package/dist/src/agent/stores-e2e.test.js.map +0 -1
  381. package/dist/src/agent/tool-context-builder.d.ts +0 -11
  382. package/dist/src/agent/tool-context-builder.js +0 -102
  383. package/dist/src/agent/tool-context-builder.js.map +0 -1
  384. package/dist/src/agent/tool-context-builder.test.d.ts +0 -6
  385. package/dist/src/agent/tool-context-builder.test.js +0 -152
  386. package/dist/src/agent/tool-context-builder.test.js.map +0 -1
  387. package/dist/src/agent/write-repo-file.test.js +0 -270
  388. package/dist/src/agent/write-repo-file.test.js.map +0 -1
  389. package/dist/src/cron/heartbeat-runner.d.ts +0 -21
  390. package/dist/src/cron/heartbeat-runner.js +0 -79
  391. package/dist/src/cron/heartbeat-runner.js.map +0 -1
  392. package/dist/src/cron/heartbeat-runner.test.d.ts +0 -6
  393. package/dist/src/cron/heartbeat-runner.test.js +0 -120
  394. package/dist/src/cron/heartbeat-runner.test.js.map +0 -1
  395. package/dist/src/cron/heartbeat-scheduler.d.ts +0 -26
  396. package/dist/src/cron/heartbeat-scheduler.js +0 -55
  397. package/dist/src/cron/heartbeat-scheduler.js.map +0 -1
  398. package/dist/src/cron/heartbeat-scheduler.test.d.ts +0 -6
  399. package/dist/src/cron/heartbeat-scheduler.test.js +0 -61
  400. package/dist/src/cron/heartbeat-scheduler.test.js.map +0 -1
  401. package/dist/src/routes/ai-stream.test.d.ts +0 -6
  402. package/dist/src/routes/ai-stream.test.js +0 -586
  403. package/dist/src/routes/ai-stream.test.js.map +0 -1
  404. package/dist/src/routes/ask-user-response.d.ts +0 -30
  405. package/dist/src/routes/ask-user-response.js +0 -61
  406. package/dist/src/routes/ask-user-response.js.map +0 -1
  407. package/dist/src/routes/ask-user-response.test.d.ts +0 -6
  408. package/dist/src/routes/ask-user-response.test.js +0 -88
  409. package/dist/src/routes/ask-user-response.test.js.map +0 -1
  410. package/dist/src/routes/chat-stream.test.d.ts +0 -6
  411. package/dist/src/routes/chat-stream.test.js +0 -155
  412. package/dist/src/routes/chat-stream.test.js.map +0 -1
  413. package/dist/src/routes/chat.test.d.ts +0 -6
  414. package/dist/src/routes/chat.test.js +0 -99
  415. package/dist/src/routes/chat.test.js.map +0 -1
  416. package/dist/src/routes/widget-actions.d.ts +0 -49
  417. package/dist/src/routes/widget-actions.js +0 -78
  418. package/dist/src/routes/widget-actions.js.map +0 -1
  419. package/dist/src/session/admin-file-tools.d.ts +0 -136
  420. package/dist/src/session/admin-file-tools.js +0 -240
  421. package/dist/src/session/admin-file-tools.js.map +0 -1
  422. package/dist/src/session/custom-tool-adapter.d.ts +0 -74
  423. package/dist/src/session/custom-tool-adapter.js +0 -180
  424. package/dist/src/session/custom-tool-adapter.js.map +0 -1
  425. package/dist/src/session/history-converter.d.ts +0 -21
  426. package/dist/src/session/history-converter.js +0 -59
  427. package/dist/src/session/history-converter.js.map +0 -1
  428. package/dist/src/session/history-converter.test.d.ts +0 -6
  429. package/dist/src/session/history-converter.test.js +0 -130
  430. package/dist/src/session/history-converter.test.js.map +0 -1
  431. package/dist/src/session/session-manager.d.ts +0 -219
  432. package/dist/src/session/session-manager.js +0 -915
  433. package/dist/src/session/session-manager.js.map +0 -1
  434. package/dist/src/session/session-manager.test.d.ts +0 -6
  435. package/dist/src/session/session-manager.test.js +0 -455
  436. package/dist/src/session/session-manager.test.js.map +0 -1
  437. package/dist/src/session/session-runner.d.ts +0 -45
  438. package/dist/src/session/session-runner.js +0 -719
  439. package/dist/src/session/session-runner.js.map +0 -1
  440. package/dist/src/session/session-runner.test.d.ts +0 -6
  441. package/dist/src/session/session-runner.test.js +0 -834
  442. package/dist/src/session/session-runner.test.js.map +0 -1
  443. /package/dist/src/{__tests__/sse-contract.test.d.ts → __fixtures__/e2e.test.d.ts} +0 -0
  444. /package/dist/src/{__tests__/tools.test.d.ts → __fixtures__/smoke.test.d.ts} +0 -0
  445. /package/dist/src/agent/{write-repo-file.test.d.ts → loop.test.d.ts} +0 -0
@@ -0,0 +1,1594 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ /**
7
+ * Agent Loop Tests
8
+ *
9
+ * Tests the state machine core:
10
+ * 1. Unit: each state handler produces expected transitions
11
+ * 2. Integration: runAgent() full conversation flow
12
+ * 3. Abort handling: clean shutdown on signal abort
13
+ * 4. Turn budget: max_turns enforcement
14
+ * 5. SSE event ordering (init first, done last, done always has usage)
15
+ */
16
+ import { describe, it, expect, vi } from 'vitest';
17
+ import { z } from 'zod';
18
+ import { SSEEventType } from '../types.js';
19
+ import { runAgent, transition } from './loop.js';
20
+ import { DEFAULT_LOOP_CONFIG } from './loop-types.js';
21
+ // ---------------------------------------------------------------------------
22
+ // Mock helpers
23
+ // ---------------------------------------------------------------------------
24
+ function makeMockLogger() {
25
+ return {
26
+ trace: vi.fn(),
27
+ debug: vi.fn(),
28
+ info: vi.fn(),
29
+ warn: vi.fn(),
30
+ error: vi.fn(),
31
+ fatal: vi.fn(),
32
+ child: vi.fn().mockReturnThis(),
33
+ };
34
+ }
35
+ function makeUsage(overrides) {
36
+ return {
37
+ inputTokens: 0,
38
+ outputTokens: 0,
39
+ totalTokens: 0,
40
+ ...overrides,
41
+ };
42
+ }
43
+ function makeMockToolDef(overrides) {
44
+ return {
45
+ description: 'Test tool',
46
+ parameters: {},
47
+ execute: vi.fn().mockResolvedValue({ output: 'tool result' }),
48
+ readOnly: false,
49
+ metadata: { category: 'custom' },
50
+ ...overrides,
51
+ };
52
+ }
53
+ function makeMockRegistry(tools = {}) {
54
+ return {
55
+ register: vi.fn(),
56
+ get: vi.fn((name) => tools[name]),
57
+ getTools: vi.fn(() => tools),
58
+ names: vi.fn(() => Object.keys(tools)),
59
+ subset: vi.fn().mockReturnValue({}),
60
+ size: Object.keys(tools).length,
61
+ };
62
+ }
63
+ /**
64
+ * Create a mock StreamTextResult that yields the given stream events.
65
+ */
66
+ function makeMockStream(events, text = 'Hello from the model') {
67
+ return {
68
+ textStream: (async function* () {
69
+ yield text;
70
+ })(),
71
+ fullStream: (async function* () {
72
+ for (const event of events) {
73
+ yield event;
74
+ }
75
+ })(),
76
+ usage: Promise.resolve(makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 })),
77
+ text: Promise.resolve(text),
78
+ };
79
+ }
80
+ function makeMockContext(overrides) {
81
+ const logger = makeMockLogger();
82
+ return {
83
+ provider: {
84
+ model: 'test-model',
85
+ provider: 'test',
86
+ languageModel: {},
87
+ streamText: vi.fn(() => makeMockStream([
88
+ { type: 'text-delta', textDelta: 'Hello' },
89
+ { type: 'finish', usage: makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }) },
90
+ ])),
91
+ generateText: vi.fn(),
92
+ },
93
+ toolRegistry: makeMockRegistry(),
94
+ permissionChecker: {
95
+ check: vi.fn().mockReturnValue({ allowed: true }),
96
+ },
97
+ logger,
98
+ signal: new AbortController().signal,
99
+ sessionId: 'test-session',
100
+ user: { roles: ['user'] },
101
+ systemPrompt: 'You are a helpful assistant.',
102
+ messages: [],
103
+ usage: makeUsage(),
104
+ turnCount: 0,
105
+ maxTurns: 10,
106
+ maxContextTokens: 200_000,
107
+ config: { ...DEFAULT_LOOP_CONFIG },
108
+ compactionFailures: 0,
109
+ preExecutionCache: new Map(),
110
+ confirmedCallIds: new Set(),
111
+ disabledToolsUntilTurn: new Map(),
112
+ waitForConfirmation: vi.fn().mockResolvedValue(true),
113
+ buildToolContext: vi.fn().mockReturnValue({
114
+ request: vi.fn(),
115
+ store: vi.fn(),
116
+ env: vi.fn(),
117
+ log: vi.fn(),
118
+ user: { roles: [] },
119
+ signal: new AbortController().signal,
120
+ sessionId: 'test-session',
121
+ }),
122
+ ...overrides,
123
+ };
124
+ }
125
+ // ---------------------------------------------------------------------------
126
+ // 1. Unit: transition dispatcher + exhaustive switch
127
+ // ---------------------------------------------------------------------------
128
+ describe('transition', () => {
129
+ it('dispatches thinking state to handleThinking', async () => {
130
+ const ctx = makeMockContext();
131
+ const state = { type: 'thinking', messages: [] };
132
+ const result = await transition(state, ctx);
133
+ // Should transition to streaming
134
+ expect(result.next.type).toBe('streaming');
135
+ expect(ctx.turnCount).toBe(1);
136
+ });
137
+ it('dispatches done state as pass-through', async () => {
138
+ const ctx = makeMockContext();
139
+ const state = { type: 'done', usage: makeUsage(), reason: 'model_stop' };
140
+ const result = await transition(state, ctx);
141
+ expect(result.next).toBe(state);
142
+ expect(result.effects).toEqual([]);
143
+ });
144
+ });
145
+ // ---------------------------------------------------------------------------
146
+ // 2. State handler unit tests
147
+ // ---------------------------------------------------------------------------
148
+ describe('handleThinking (via transition)', () => {
149
+ it('increments turn count and starts streaming', async () => {
150
+ const ctx = makeMockContext();
151
+ const state = { type: 'thinking', messages: [] };
152
+ const result = await transition(state, ctx);
153
+ expect(result.next.type).toBe('streaming');
154
+ expect(ctx.turnCount).toBe(1);
155
+ expect(ctx.provider.streamText).toHaveBeenCalledWith(expect.objectContaining({
156
+ system: 'You are a helpful assistant.',
157
+ abortSignal: ctx.signal,
158
+ }));
159
+ });
160
+ it('passes tool schemas without execute functions to provider', async () => {
161
+ const testTool = makeMockToolDef({ description: 'Search repos' });
162
+ const registry = makeMockRegistry({ search: testTool });
163
+ const ctx = makeMockContext({ toolRegistry: registry });
164
+ await transition({ type: 'thinking', messages: [] }, ctx);
165
+ const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
166
+ const tools = streamTextCall.tools;
167
+ expect(tools['search']).toBeDefined();
168
+ // Should have inputSchema (not parameters) and no execute
169
+ expect(tools['search']).toHaveProperty('inputSchema');
170
+ expect(tools['search']).not.toHaveProperty('execute');
171
+ });
172
+ it('detects tool call loops and forces done(loop_detected)', async () => {
173
+ // Build messages with 8 repeated tool calls for the same tool
174
+ const messages = [];
175
+ for (let i = 0; i < 8; i++) {
176
+ messages.push({
177
+ role: 'assistant',
178
+ content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'stuck_tool', input: { q: 'same' } }],
179
+ });
180
+ messages.push({
181
+ role: 'tool',
182
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'stuck_tool', output: { type: 'text', value: 'error' } }],
183
+ });
184
+ }
185
+ const ctx = makeMockContext();
186
+ const result = await transition({ type: 'thinking', messages }, ctx);
187
+ expect(result.next.type).toBe('done');
188
+ if (result.next.type === 'done') {
189
+ expect(result.next.reason).toBe('loop_detected');
190
+ }
191
+ const errorEvents = result.effects.filter((e) => e.type === SSEEventType.Error);
192
+ expect(errorEvents.length).toBe(1);
193
+ });
194
+ it('detects loops with similar (not identical) parameters', async () => {
195
+ // Build messages where the same tool is called with slightly different
196
+ // params. Use a non-pagination key (retry_count) — pagination keys
197
+ // (offset/limit/page/cursor) are treated as iteration, not loops.
198
+ const messages = [];
199
+ for (let i = 0; i < 8; i++) {
200
+ messages.push({
201
+ role: 'assistant',
202
+ content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'search_api', input: { query: 'test', retry_count: i } }],
203
+ });
204
+ messages.push({
205
+ role: 'tool',
206
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'search_api', output: { type: 'text', value: 'no results' } }],
207
+ });
208
+ }
209
+ const ctx = makeMockContext();
210
+ const result = await transition({ type: 'thinking', messages }, ctx);
211
+ // Same tool, same keys, >50% identical values → detected as loop
212
+ expect(result.next.type).toBe('done');
213
+ if (result.next.type === 'done') {
214
+ expect(result.next.reason).toBe('loop_detected');
215
+ }
216
+ });
217
+ it('does NOT detect pagination as a loop (offset/limit/page variants)', async () => {
218
+ // Agent walking a long file in chunks is legitimate iteration — same
219
+ // tool, same path, different offset. Must not trip the loop detector.
220
+ const messages = [];
221
+ for (let i = 0; i < 8; i++) {
222
+ messages.push({
223
+ role: 'assistant',
224
+ content: [{
225
+ type: 'tool-call',
226
+ toolCallId: `c${i}`,
227
+ toolName: 'read_repo_file',
228
+ input: { path: 'knowledge/big.md', offset: 1 + i * 2000, limit: 2000 },
229
+ }],
230
+ });
231
+ messages.push({
232
+ role: 'tool',
233
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'read_repo_file', output: { type: 'text', value: 'chunk' } }],
234
+ });
235
+ }
236
+ const ctx = makeMockContext();
237
+ const result = await transition({ type: 'thinking', messages }, ctx);
238
+ // Should NOT be done — the loop detector should have skipped these.
239
+ expect(result.next.type).not.toBe('done');
240
+ });
241
+ it('replaces old tool results with summarizer output when hook is set', async () => {
242
+ // Build 20 tool-result messages so clearing triggers (threshold=15 by default)
243
+ const messages = [];
244
+ for (let i = 0; i < 20; i++) {
245
+ messages.push({
246
+ role: 'tool',
247
+ content: [{
248
+ type: 'tool-result',
249
+ toolCallId: `c${i}`,
250
+ toolName: 'search_api',
251
+ output: { type: 'text', value: `result body ${i} with lots of content` },
252
+ }],
253
+ });
254
+ }
255
+ const summarizer = vi.fn().mockResolvedValue('found 3 matching records');
256
+ const ctx = makeMockContext({ summarizeToolResult: summarizer });
257
+ await transition({ type: 'thinking', messages }, ctx);
258
+ // Summarizer should have been called for the cleared (non-kept) messages.
259
+ // threshold=15, keepRecent=5 → 15 cleared (all but the last 5).
260
+ expect(summarizer).toHaveBeenCalled();
261
+ expect(summarizer.mock.calls.length).toBe(15);
262
+ // Verify the summary is wired through — the messages passed to streamText
263
+ // should include the summary text in a cleared marker.
264
+ const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
265
+ const passedMessages = streamTextCall.messages;
266
+ const clearedMsg = passedMessages[0];
267
+ if (clearedMsg.role === 'tool' && Array.isArray(clearedMsg.content)) {
268
+ const part = clearedMsg.content[0];
269
+ if ('output' in part && part.output && typeof part.output === 'object' && 'value' in part.output) {
270
+ expect(String(part.output.value)).toContain('found 3 matching records');
271
+ expect(String(part.output.value)).toContain('search_api');
272
+ }
273
+ }
274
+ });
275
+ it('falls back to static marker when summarizer throws', async () => {
276
+ const messages = [];
277
+ for (let i = 0; i < 20; i++) {
278
+ messages.push({
279
+ role: 'tool',
280
+ content: [{
281
+ type: 'tool-result',
282
+ toolCallId: `c${i}`,
283
+ toolName: 'flaky_tool',
284
+ output: { type: 'text', value: `body ${i}` },
285
+ }],
286
+ });
287
+ }
288
+ const summarizer = vi.fn().mockRejectedValue(new Error('haiku unavailable'));
289
+ const ctx = makeMockContext({ summarizeToolResult: summarizer });
290
+ await transition({ type: 'thinking', messages }, ctx);
291
+ // Summarizer was called but threw; we should still proceed with static marker
292
+ expect(summarizer).toHaveBeenCalled();
293
+ const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
294
+ const clearedMsg = streamTextCall.messages[0];
295
+ if (clearedMsg.role === 'tool' && Array.isArray(clearedMsg.content)) {
296
+ const part = clearedMsg.content[0];
297
+ if ('output' in part && part.output && typeof part.output === 'object' && 'value' in part.output) {
298
+ expect(String(part.output.value)).toContain('Tool result cleared');
299
+ }
300
+ }
301
+ // The failure should have been logged
302
+ expect(ctx.logger.warn).toHaveBeenCalledWith('tool_result_summarization_failed', expect.objectContaining({ tool: 'flaky_tool' }));
303
+ });
304
+ it('skips already-cleared messages (idempotent)', async () => {
305
+ // Already-cleared messages keep their original toolCallId (otherwise
306
+ // providers reject "orphaned" assistant tool-calls). Detection is by
307
+ // output-value prefix: "[Tool result cleared..." or "[Summary of ...".
308
+ const messages = [];
309
+ for (let i = 0; i < 20; i++) {
310
+ messages.push({
311
+ role: 'tool',
312
+ content: [{
313
+ type: 'tool-result',
314
+ toolCallId: `c${i}`,
315
+ toolName: 'search_api',
316
+ // First 15 are already-cleared (marker prefix); last 5 are fresh
317
+ output: {
318
+ type: 'text',
319
+ value: i < 15 ? '[Tool result cleared to save context space]' : `body ${i}`,
320
+ },
321
+ }],
322
+ });
323
+ }
324
+ const summarizer = vi.fn().mockResolvedValue('summary');
325
+ const ctx = makeMockContext({ summarizeToolResult: summarizer });
326
+ await transition({ type: 'thinking', messages }, ctx);
327
+ // Clearing kicks in (20 > 15), but all 15 candidates are already cleared.
328
+ // Summarizer should NOT be called for already-cleared messages.
329
+ expect(summarizer).not.toHaveBeenCalled();
330
+ });
331
+ it('escalates at loopEscalationThreshold: stronger warn + removes looping tool', async () => {
332
+ // Build messages with 5 tool calls so count hits escalation (default=5)
333
+ // but not hard-stop (default=8).
334
+ const messages = [];
335
+ for (let i = 0; i < 5; i++) {
336
+ messages.push({
337
+ role: 'assistant',
338
+ content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'stuck_api', input: { q: 'same' } }],
339
+ });
340
+ messages.push({
341
+ role: 'tool',
342
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'stuck_api', output: { type: 'text', value: 'no progress' } }],
343
+ });
344
+ }
345
+ // Registry has both the looping tool and another tool
346
+ const stuckTool = makeMockToolDef({ description: 'Stuck tool' });
347
+ const otherTool = makeMockToolDef({ description: 'Other tool' });
348
+ const registry = makeMockRegistry({ stuck_api: stuckTool, other_tool: otherTool });
349
+ const ctx = makeMockContext({ toolRegistry: registry });
350
+ const result = await transition({ type: 'thinking', messages }, ctx);
351
+ // Should still stream (not hard-stop)
352
+ expect(result.next.type).toBe('streaming');
353
+ // The escalation-level warn should have been logged
354
+ expect(ctx.logger.warn).toHaveBeenCalledWith('agent_loop_escalation', expect.objectContaining({ tool: 'stuck_api', count: 5 }));
355
+ // The looping tool should be EXCLUDED from this turn's tool set
356
+ const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
357
+ const passedTools = streamTextCall.tools;
358
+ expect(passedTools['stuck_api']).toBeUndefined();
359
+ expect(passedTools['other_tool']).toBeDefined();
360
+ // Escalation message should be appended
361
+ const lastMsg = streamTextCall.messages[streamTextCall.messages.length - 1];
362
+ expect(lastMsg.role).toBe('user');
363
+ if (typeof lastMsg.content === 'string') {
364
+ expect(lastMsg.content).toContain('temporarily disabled');
365
+ expect(lastMsg.content).toContain('stuck_api');
366
+ }
367
+ // The looping tool should be registered in the cooldown map, not just
368
+ // filtered once-off — so subsequent turns also skip it.
369
+ expect(ctx.disabledToolsUntilTurn.has('stuck_api')).toBe(true);
370
+ });
371
+ it('escalation cooldown keeps tool disabled across subsequent turns', async () => {
372
+ // Simulate a session where escalation fires at turn 5 with default
373
+ // cooldown of 3, then the agent moves on. The looping tool should be
374
+ // excluded from turns 5-7 and return at turn 8.
375
+ const stuckTool = makeMockToolDef({ description: 'Stuck tool' });
376
+ const otherTool = makeMockToolDef({ description: 'Other tool' });
377
+ const registry = makeMockRegistry({ stuck_api: stuckTool, other_tool: otherTool });
378
+ const ctx = makeMockContext({ toolRegistry: registry });
379
+ // Pre-populate the cooldown as if escalation fired at turn 5
380
+ ctx.turnCount = 4; // next turn will be 5
381
+ ctx.disabledToolsUntilTurn.set('stuck_api', 8); // disable until turn 8
382
+ // Turn 5: tool still disabled
383
+ await transition({ type: 'thinking', messages: [] }, ctx);
384
+ let streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
385
+ let passedTools = streamTextCall.tools;
386
+ expect(passedTools['stuck_api']).toBeUndefined();
387
+ expect(passedTools['other_tool']).toBeDefined();
388
+ // Turn 6: still disabled
389
+ await transition({ type: 'thinking', messages: [] }, ctx);
390
+ streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[1][0];
391
+ passedTools = streamTextCall.tools;
392
+ expect(passedTools['stuck_api']).toBeUndefined();
393
+ // Turn 7: still disabled (turnCount=7, untilTurn=8)
394
+ await transition({ type: 'thinking', messages: [] }, ctx);
395
+ streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[2][0];
396
+ passedTools = streamTextCall.tools;
397
+ expect(passedTools['stuck_api']).toBeUndefined();
398
+ // Turn 8: cooldown expired — tool back in the set
399
+ await transition({ type: 'thinking', messages: [] }, ctx);
400
+ streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[3][0];
401
+ passedTools = streamTextCall.tools;
402
+ expect(passedTools['stuck_api']).toBeDefined();
403
+ // Map should be cleaned up after expiry
404
+ expect(ctx.disabledToolsUntilTurn.has('stuck_api')).toBe(false);
405
+ });
406
+ it('injects warning when tool called 3+ times', async () => {
407
+ const messages = [];
408
+ for (let i = 0; i < 3; i++) {
409
+ messages.push({
410
+ role: 'assistant',
411
+ content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'flaky_api', input: {} }],
412
+ });
413
+ messages.push({
414
+ role: 'tool',
415
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'flaky_api', output: { type: 'text', value: 'fail' } }],
416
+ });
417
+ }
418
+ const ctx = makeMockContext();
419
+ const result = await transition({ type: 'thinking', messages }, ctx);
420
+ // Should still stream (not stop), but the messages passed to streamText
421
+ // should include a warning
422
+ expect(result.next.type).toBe('streaming');
423
+ const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
424
+ const lastMsg = streamTextCall.messages[streamTextCall.messages.length - 1];
425
+ expect(lastMsg.role).toBe('user');
426
+ if (typeof lastMsg.content === 'string') {
427
+ expect(lastMsg.content).toContain('flaky_api');
428
+ expect(lastMsg.content).toContain('3 times');
429
+ }
430
+ });
431
+ });
432
+ describe('handleStreaming (via transition)', () => {
433
+ it('text-only response transitions to done(model_stop)', async () => {
434
+ const stream = makeMockStream([
435
+ { type: 'text-delta', textDelta: 'Hello!' },
436
+ { type: 'finish', usage: makeUsage({ inputTokens: 50, outputTokens: 20, totalTokens: 70 }) },
437
+ ]);
438
+ const ctx = makeMockContext();
439
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
440
+ const result = await transition(state, ctx);
441
+ expect(result.next.type).toBe('done');
442
+ if (result.next.type === 'done') {
443
+ expect(result.next.reason).toBe('model_stop');
444
+ }
445
+ // Should have emitted text_delta events
446
+ const textEvents = result.effects.filter((e) => e.type === SSEEventType.TextDelta);
447
+ expect(textEvents.length).toBeGreaterThan(0);
448
+ });
449
+ it('tool call response transitions to executing', async () => {
450
+ const stream = makeMockStream([
451
+ { type: 'text-delta', textDelta: 'Let me search.' },
452
+ {
453
+ type: 'tool-call',
454
+ toolCallId: 'call-1',
455
+ toolName: 'search',
456
+ args: { query: 'test' },
457
+ },
458
+ { type: 'finish', usage: makeUsage({ inputTokens: 50, outputTokens: 20, totalTokens: 70 }) },
459
+ ], 'Let me search.');
460
+ const ctx = makeMockContext();
461
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
462
+ const result = await transition(state, ctx);
463
+ expect(result.next.type).toBe('executing');
464
+ if (result.next.type === 'executing') {
465
+ expect(result.next.current.toolCallId).toBe('call-1');
466
+ expect(result.next.current.toolName).toBe('search');
467
+ expect(result.next.current.args).toEqual({ query: 'test' });
468
+ }
469
+ });
470
+ it('tracks token usage from finish events', async () => {
471
+ const stream = makeMockStream([
472
+ { type: 'finish', usage: makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }) },
473
+ ]);
474
+ const ctx = makeMockContext();
475
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
476
+ await transition(state, ctx);
477
+ expect(ctx.usage.inputTokens).toBe(100);
478
+ expect(ctx.usage.outputTokens).toBe(50);
479
+ });
480
+ it('stream error transitions to done(error)', async () => {
481
+ const stream = makeMockStream([
482
+ { type: 'error', error: new Error('Provider failed') },
483
+ ]);
484
+ const ctx = makeMockContext();
485
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
486
+ const result = await transition(state, ctx);
487
+ expect(result.next.type).toBe('done');
488
+ if (result.next.type === 'done') {
489
+ expect(result.next.reason).toBe('error');
490
+ }
491
+ // Should have emitted an error SSE event
492
+ const errorEvents = result.effects.filter((e) => e.type === SSEEventType.Error);
493
+ expect(errorEvents.length).toBe(1);
494
+ });
495
+ it('pre-executes read-only tools during streaming', async () => {
496
+ const readOnlyTool = makeMockToolDef({
497
+ readOnly: true,
498
+ execute: vi.fn().mockResolvedValue('cached result'),
499
+ });
500
+ const registry = makeMockRegistry({ lookup: readOnlyTool });
501
+ const stream = makeMockStream([
502
+ { type: 'tool-call', toolCallId: 'call-ro', toolName: 'lookup', args: { id: '1' } },
503
+ { type: 'finish', usage: makeUsage() },
504
+ ], '');
505
+ const ctx = makeMockContext({ toolRegistry: registry });
506
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
507
+ await transition(state, ctx);
508
+ // Pre-execution cache should have an entry
509
+ expect(ctx.preExecutionCache.has('call-ro')).toBe(true);
510
+ });
511
+ it('logs pre-execution errors on abort instead of swallowing silently', async () => {
512
+ const failingTool = makeMockToolDef({
513
+ readOnly: true,
514
+ execute: vi.fn().mockRejectedValue(new Error('tool crashed')),
515
+ });
516
+ const registry = makeMockRegistry({ broken_lookup: failingTool });
517
+ const stream = makeMockStream([
518
+ { type: 'tool-call', toolCallId: 'call-fail', toolName: 'broken_lookup', args: {} },
519
+ { type: 'finish', usage: makeUsage() },
520
+ ], '');
521
+ const ctx = makeMockContext({ toolRegistry: registry });
522
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
523
+ await transition(state, ctx);
524
+ // Wait for the pre-execution promise to settle (it rejects)
525
+ const cached = ctx.preExecutionCache.get('call-fail');
526
+ expect(cached).toBeDefined();
527
+ // The .catch() handler should have logged, not thrown
528
+ await expect(cached).rejects.toThrow('tool crashed');
529
+ // The suppression handler should have logged the error
530
+ expect(ctx.logger.debug).toHaveBeenCalledWith('preexec_suppressed', expect.objectContaining({
531
+ tool: 'broken_lookup',
532
+ error: 'tool crashed',
533
+ }));
534
+ });
535
+ });
536
+ describe('handleExecuting (via transition)', () => {
537
+ it('executes a tool and transitions to thinking when queue empty', async () => {
538
+ const searchTool = makeMockToolDef({
539
+ execute: vi.fn().mockResolvedValue({ repos: ['amodal'] }),
540
+ });
541
+ const registry = makeMockRegistry({ search: searchTool });
542
+ const ctx = makeMockContext({ toolRegistry: registry });
543
+ const state = {
544
+ type: 'executing',
545
+ queue: [],
546
+ current: { toolCallId: 'call-1', toolName: 'search', args: { q: 'test' } },
547
+ results: [],
548
+ };
549
+ const result = await transition(state, ctx);
550
+ expect(result.next.type).toBe('thinking');
551
+ expect(searchTool.execute).toHaveBeenCalledWith({ q: 'test' }, expect.objectContaining({ sessionId: 'test-session' }));
552
+ // Should emit tool_call_start and tool_call_result SSE events
553
+ const startEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallStart);
554
+ const resultEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult);
555
+ expect(startEvents.length).toBe(1);
556
+ expect(resultEvents.length).toBe(1);
557
+ });
558
+ it('routes requiresConfirmation tools through CONFIRMING on first pass', async () => {
559
+ const destructiveTool = makeMockToolDef({
560
+ execute: vi.fn().mockResolvedValue({ deleted: true }),
561
+ requiresConfirmation: true,
562
+ });
563
+ const registry = makeMockRegistry({ delete_repo: destructiveTool });
564
+ const ctx = makeMockContext({ toolRegistry: registry });
565
+ const state = {
566
+ type: 'executing',
567
+ queue: [],
568
+ current: { toolCallId: 'call-danger', toolName: 'delete_repo', args: { name: 'foo' } },
569
+ results: [],
570
+ };
571
+ const result = await transition(state, ctx);
572
+ expect(result.next.type).toBe('confirming');
573
+ expect(destructiveTool.execute).not.toHaveBeenCalled();
574
+ // ConfirmationRequired SSE event should be emitted
575
+ const confirmEvents = result.effects.filter((e) => e.type === SSEEventType.ConfirmationRequired);
576
+ expect(confirmEvents.length).toBe(1);
577
+ });
578
+ it('executes requiresConfirmation tools after approval (no re-confirm loop)', async () => {
579
+ const destructiveTool = makeMockToolDef({
580
+ execute: vi.fn().mockResolvedValue({ deleted: true }),
581
+ requiresConfirmation: true,
582
+ });
583
+ const registry = makeMockRegistry({ delete_repo: destructiveTool });
584
+ // Pre-populate confirmedCallIds as if CONFIRMING already approved this call
585
+ const ctx = makeMockContext({ toolRegistry: registry });
586
+ ctx.confirmedCallIds.add('call-approved');
587
+ const state = {
588
+ type: 'executing',
589
+ queue: [],
590
+ current: { toolCallId: 'call-approved', toolName: 'delete_repo', args: { name: 'foo' } },
591
+ results: [],
592
+ };
593
+ const result = await transition(state, ctx);
594
+ // Should execute this time, not route back to CONFIRMING
595
+ expect(result.next.type).toBe('thinking');
596
+ expect(destructiveTool.execute).toHaveBeenCalledTimes(1);
597
+ });
598
+ it('connection tool does NOT re-prompt after confirmedCallIds marks the call', async () => {
599
+ // Regression test for the latent infinite-loop bug: a connection tool
600
+ // whose ACL gate returns requiresConfirmation=true used to re-route back
601
+ // to CONFIRMING on every pass, since the permission checker has no
602
+ // notion of "already approved." confirmedCallIds fixes this.
603
+ const connectionTool = makeMockToolDef({
604
+ execute: vi.fn().mockResolvedValue({ ok: true }),
605
+ metadata: { category: 'connection', connection: 'github' },
606
+ });
607
+ const registry = makeMockRegistry({ request: connectionTool });
608
+ const ctx = makeMockContext({
609
+ toolRegistry: registry,
610
+ permissionChecker: {
611
+ check: vi.fn().mockReturnValue({
612
+ allowed: true,
613
+ requiresConfirmation: true,
614
+ reason: 'Write to github requires confirmation',
615
+ }),
616
+ },
617
+ });
618
+ // Simulate: CONFIRMING has already approved this call
619
+ ctx.confirmedCallIds.add('call-gh-write');
620
+ const state = {
621
+ type: 'executing',
622
+ queue: [],
623
+ current: {
624
+ toolCallId: 'call-gh-write',
625
+ toolName: 'request',
626
+ args: { method: 'POST', endpoint: '/repos/foo', intent: 'confirmed_write' },
627
+ },
628
+ results: [],
629
+ };
630
+ const result = await transition(state, ctx);
631
+ // Should execute the connection tool instead of re-routing to CONFIRMING
632
+ expect(result.next.type).toBe('thinking');
633
+ expect(connectionTool.execute).toHaveBeenCalledTimes(1);
634
+ // No ConfirmationRequired SSE event should have been emitted
635
+ const confirmEvents = result.effects.filter((e) => e.type === SSEEventType.ConfirmationRequired);
636
+ expect(confirmEvents.length).toBe(0);
637
+ });
638
+ it('transitions to compacting when context exceeds threshold', async () => {
639
+ const tool = makeMockToolDef({
640
+ // Return a large result to inflate context
641
+ execute: vi.fn().mockResolvedValue('x'.repeat(10_000)),
642
+ });
643
+ const registry = makeMockRegistry({ big_tool: tool });
644
+ const ctx = makeMockContext({
645
+ toolRegistry: registry,
646
+ maxContextTokens: 1000, // Very small budget
647
+ config: { ...DEFAULT_LOOP_CONFIG, compactThreshold: 0.7 },
648
+ });
649
+ // Pre-populate messages to be near the threshold
650
+ ctx.messages = Array.from({ length: 20 }, () => ({
651
+ role: 'user',
652
+ content: 'x'.repeat(200),
653
+ }));
654
+ const state = {
655
+ type: 'executing',
656
+ queue: [],
657
+ current: { toolCallId: 'call-1', toolName: 'big_tool', args: {} },
658
+ results: [],
659
+ };
660
+ const result = await transition(state, ctx);
661
+ expect(result.next.type).toBe('compacting');
662
+ if (result.next.type === 'compacting') {
663
+ expect(result.next.estimatedTokens).toBeGreaterThan(0);
664
+ }
665
+ expect(ctx.logger.info).toHaveBeenCalledWith('context_compaction_triggered', expect.objectContaining({
666
+ session: 'test-session',
667
+ }));
668
+ });
669
+ it('continues to next tool when queue has more items', async () => {
670
+ const tool = makeMockToolDef();
671
+ const registry = makeMockRegistry({ tool_a: tool, tool_b: tool });
672
+ const ctx = makeMockContext({ toolRegistry: registry });
673
+ const state = {
674
+ type: 'executing',
675
+ queue: [{ toolCallId: 'call-2', toolName: 'tool_b', args: {} }],
676
+ current: { toolCallId: 'call-1', toolName: 'tool_a', args: {} },
677
+ results: [],
678
+ };
679
+ const result = await transition(state, ctx);
680
+ expect(result.next.type).toBe('executing');
681
+ if (result.next.type === 'executing') {
682
+ expect(result.next.current.toolCallId).toBe('call-2');
683
+ expect(result.next.queue).toEqual([]);
684
+ }
685
+ });
686
+ it('returns error result for unknown tool', async () => {
687
+ const ctx = makeMockContext();
688
+ const state = {
689
+ type: 'executing',
690
+ queue: [],
691
+ current: { toolCallId: 'call-1', toolName: 'nonexistent', args: {} },
692
+ results: [],
693
+ };
694
+ const result = await transition(state, ctx);
695
+ // Should transition to thinking (agent can recover)
696
+ expect(result.next.type).toBe('thinking');
697
+ // Messages should contain the error tool result
698
+ expect(ctx.messages.length).toBeGreaterThan(0);
699
+ });
700
+ it('handles tool execution error as continue site', async () => {
701
+ const failingTool = makeMockToolDef({
702
+ execute: vi.fn().mockRejectedValue(new Error('API rate limit')),
703
+ });
704
+ const registry = makeMockRegistry({ api_call: failingTool });
705
+ const ctx = makeMockContext({ toolRegistry: registry });
706
+ const state = {
707
+ type: 'executing',
708
+ queue: [],
709
+ current: { toolCallId: 'call-1', toolName: 'api_call', args: {} },
710
+ results: [],
711
+ };
712
+ const result = await transition(state, ctx);
713
+ // Should NOT crash — transitions to thinking so model can recover
714
+ expect(result.next.type).toBe('thinking');
715
+ // Should emit tool_call_result with error status
716
+ const resultEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult);
717
+ expect(resultEvents.length).toBe(1);
718
+ // Logger should have recorded the error
719
+ expect(ctx.logger.error).toHaveBeenCalledWith('tool_execution_error', expect.objectContaining({
720
+ tool: 'api_call',
721
+ }));
722
+ });
723
+ it('rejects hallucinated args that fail schema validation', async () => {
724
+ const { z } = await import('zod');
725
+ const strictTool = makeMockToolDef({
726
+ parameters: z.object({ query: z.string(), limit: z.number().int().positive() }),
727
+ });
728
+ const registry = makeMockRegistry({ search: strictTool });
729
+ const ctx = makeMockContext({ toolRegistry: registry });
730
+ const state = {
731
+ type: 'executing',
732
+ queue: [],
733
+ current: { toolCallId: 'call-1', toolName: 'search', args: { query: 123, limit: -5 } },
734
+ results: [],
735
+ };
736
+ const result = await transition(state, ctx);
737
+ // Should recover — transition to thinking with error message for the model
738
+ expect(result.next.type).toBe('thinking');
739
+ expect(ctx.logger.warn).toHaveBeenCalledWith('tool_args_invalid', expect.objectContaining({
740
+ tool: 'search',
741
+ }));
742
+ // Should NOT have called execute
743
+ expect(strictTool.execute).not.toHaveBeenCalled();
744
+ });
745
+ it('sanitizes sensitive parameters in SSE events', async () => {
746
+ const tool = makeMockToolDef();
747
+ const registry = makeMockRegistry({ auth_tool: tool });
748
+ const ctx = makeMockContext({ toolRegistry: registry });
749
+ const state = {
750
+ type: 'executing',
751
+ queue: [],
752
+ current: {
753
+ toolCallId: 'call-1',
754
+ toolName: 'auth_tool',
755
+ args: { api_key: 'sk-secret123', query: 'hello' },
756
+ },
757
+ results: [],
758
+ };
759
+ const result = await transition(state, ctx);
760
+ const startEvent = result.effects.find((e) => e.type === SSEEventType.ToolCallStart);
761
+ expect(startEvent).toBeDefined();
762
+ if (startEvent && 'parameters' in startEvent) {
763
+ const params = startEvent.parameters;
764
+ expect(params['api_key']).toBe('[REDACTED]');
765
+ expect(params['query']).toBe('hello');
766
+ }
767
+ });
768
+ it('snips oversized tool results keeping head and tail', async () => {
769
+ const largeOutput = 'A'.repeat(25_000); // Exceeds 20K default maxResultSize
770
+ const tool = makeMockToolDef({
771
+ execute: vi.fn().mockResolvedValue(largeOutput),
772
+ });
773
+ const registry = makeMockRegistry({ big_api: tool });
774
+ const ctx = makeMockContext({ toolRegistry: registry });
775
+ const state = {
776
+ type: 'executing',
777
+ queue: [],
778
+ current: { toolCallId: 'call-1', toolName: 'big_api', args: {} },
779
+ results: [],
780
+ };
781
+ await transition(state, ctx);
782
+ // The message appended should contain the snipped content
783
+ const lastMsg = ctx.messages[ctx.messages.length - 1];
784
+ expect(lastMsg.role).toBe('tool');
785
+ if (Array.isArray(lastMsg.content) && 'output' in lastMsg.content[0]) {
786
+ const output = lastMsg.content[0].output;
787
+ const value = typeof output === 'object' && output !== null && 'value' in output
788
+ ? String(output.value)
789
+ : '';
790
+ expect(value).toContain('snipped');
791
+ expect(value.length).toBeLessThan(largeOutput.length);
792
+ }
793
+ expect(ctx.logger.info).toHaveBeenCalledWith('tool_result_snipped', expect.objectContaining({
794
+ tool: 'big_api',
795
+ originalSize: 25_000,
796
+ }));
797
+ });
798
+ it('uses pre-execution cache for read-only tools', async () => {
799
+ const readTool = makeMockToolDef({ readOnly: true });
800
+ const registry = makeMockRegistry({ read_data: readTool });
801
+ const ctx = makeMockContext({ toolRegistry: registry });
802
+ // Simulate pre-execution cache from streaming phase
803
+ ctx.preExecutionCache.set('call-cached', Promise.resolve({ data: 'cached' }));
804
+ const state = {
805
+ type: 'executing',
806
+ queue: [],
807
+ current: { toolCallId: 'call-cached', toolName: 'read_data', args: {} },
808
+ results: [],
809
+ };
810
+ await transition(state, ctx);
811
+ // The cached result should be used — tool.execute should NOT be called again
812
+ expect(readTool.execute).not.toHaveBeenCalled();
813
+ });
814
+ // -------------------------------------------------------------------------
815
+ // Parallel tool calls: batch contiguous read-only calls
816
+ // -------------------------------------------------------------------------
817
+ it('batches contiguous read-only calls and runs them concurrently', async () => {
818
+ // Two read-only tools. Each sleeps before resolving. If batched in
819
+ // parallel, total wall time ≈ max(sleep). If sequential, ≈ sum(sleep).
820
+ const sleep = (ms) => new Promise((r) => setTimeout(() => r('ok'), ms));
821
+ const readA = makeMockToolDef({
822
+ readOnly: true,
823
+ execute: vi.fn(() => sleep(50)),
824
+ });
825
+ const readB = makeMockToolDef({
826
+ readOnly: true,
827
+ execute: vi.fn(() => sleep(50)),
828
+ });
829
+ const registry = makeMockRegistry({ read_a: readA, read_b: readB });
830
+ const ctx = makeMockContext({ toolRegistry: registry });
831
+ const state = {
832
+ type: 'executing',
833
+ queue: [{ toolCallId: 'call-b', toolName: 'read_b', args: {} }],
834
+ current: { toolCallId: 'call-a', toolName: 'read_a', args: {} },
835
+ results: [],
836
+ };
837
+ const startedAt = Date.now();
838
+ const result = await transition(state, ctx);
839
+ const elapsed = Date.now() - startedAt;
840
+ // Both executed
841
+ expect(readA.execute).toHaveBeenCalledTimes(1);
842
+ expect(readB.execute).toHaveBeenCalledTimes(1);
843
+ // Parallel: should finish in roughly one sleep, well under the sum
844
+ expect(elapsed).toBeLessThan(90);
845
+ // Batch drained the queue and transitioned to thinking in one step
846
+ expect(result.next.type).toBe('thinking');
847
+ // Both results appended to messages
848
+ const toolMessages = ctx.messages.filter((m) => m.role === 'tool');
849
+ expect(toolMessages).toHaveLength(2);
850
+ // Per-call SSE events emitted (2 start + 2 result)
851
+ const starts = result.effects.filter((e) => e.type === SSEEventType.ToolCallStart);
852
+ const results = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult);
853
+ expect(starts).toHaveLength(2);
854
+ expect(results).toHaveLength(2);
855
+ });
856
+ it('stops batching at the first write (non-readOnly) tool', async () => {
857
+ const readTool = makeMockToolDef({ readOnly: true, execute: vi.fn().mockResolvedValue('r') });
858
+ const writeTool = makeMockToolDef({ readOnly: false, execute: vi.fn().mockResolvedValue('w') });
859
+ const registry = makeMockRegistry({ read: readTool, write: writeTool });
860
+ const ctx = makeMockContext({ toolRegistry: registry });
861
+ const state = {
862
+ type: 'executing',
863
+ queue: [
864
+ { toolCallId: 'call-read-2', toolName: 'read', args: {} },
865
+ { toolCallId: 'call-write', toolName: 'write', args: {} },
866
+ { toolCallId: 'call-read-3', toolName: 'read', args: {} },
867
+ ],
868
+ current: { toolCallId: 'call-read-1', toolName: 'read', args: {} },
869
+ results: [],
870
+ };
871
+ const result = await transition(state, ctx);
872
+ // Batched the two leading reads; the write stopped the batch
873
+ expect(readTool.execute).toHaveBeenCalledTimes(2);
874
+ expect(writeTool.execute).not.toHaveBeenCalled();
875
+ // Next state should process the write sequentially
876
+ expect(result.next.type).toBe('executing');
877
+ if (result.next.type === 'executing') {
878
+ expect(result.next.current.toolCallId).toBe('call-write');
879
+ expect(result.next.queue).toHaveLength(1);
880
+ expect(result.next.queue[0].toolCallId).toBe('call-read-3');
881
+ }
882
+ });
883
+ it('does not batch when the current call is a write', async () => {
884
+ const writeTool = makeMockToolDef({ readOnly: false, execute: vi.fn().mockResolvedValue('w') });
885
+ const readTool = makeMockToolDef({ readOnly: true, execute: vi.fn().mockResolvedValue('r') });
886
+ const registry = makeMockRegistry({ write: writeTool, read: readTool });
887
+ const ctx = makeMockContext({ toolRegistry: registry });
888
+ const state = {
889
+ type: 'executing',
890
+ queue: [{ toolCallId: 'call-read', toolName: 'read', args: {} }],
891
+ current: { toolCallId: 'call-write', toolName: 'write', args: {} },
892
+ results: [],
893
+ };
894
+ const result = await transition(state, ctx);
895
+ // Only write executes; read stays in queue for the next transition
896
+ expect(writeTool.execute).toHaveBeenCalledTimes(1);
897
+ expect(readTool.execute).not.toHaveBeenCalled();
898
+ expect(result.next.type).toBe('executing');
899
+ if (result.next.type === 'executing') {
900
+ expect(result.next.current.toolCallId).toBe('call-read');
901
+ }
902
+ });
903
+ it('does not batch connection tools (any call could transition to CONFIRMING)', async () => {
904
+ const connRead = makeMockToolDef({
905
+ readOnly: true,
906
+ execute: vi.fn().mockResolvedValue('x'),
907
+ metadata: { category: 'connection', connection: 'github' },
908
+ });
909
+ const registry = makeMockRegistry({ request: connRead });
910
+ const ctx = makeMockContext({ toolRegistry: registry });
911
+ const state = {
912
+ type: 'executing',
913
+ queue: [{ toolCallId: 'call-2', toolName: 'request', args: { method: 'GET', endpoint: '/x' } }],
914
+ current: { toolCallId: 'call-1', toolName: 'request', args: { method: 'GET', endpoint: '/y' } },
915
+ results: [],
916
+ };
917
+ const result = await transition(state, ctx);
918
+ // Only the first ran; the second stays queued for its own ACL check
919
+ expect(connRead.execute).toHaveBeenCalledTimes(1);
920
+ expect(result.next.type).toBe('executing');
921
+ });
922
+ it('does not batch read-only tools that require confirmation', async () => {
923
+ const gated = makeMockToolDef({
924
+ readOnly: true,
925
+ requiresConfirmation: true,
926
+ execute: vi.fn().mockResolvedValue('x'),
927
+ });
928
+ const registry = makeMockRegistry({ gated });
929
+ const ctx = makeMockContext({ toolRegistry: registry });
930
+ const state = {
931
+ type: 'executing',
932
+ queue: [{ toolCallId: 'call-2', toolName: 'gated', args: {} }],
933
+ current: { toolCallId: 'call-1', toolName: 'gated', args: {} },
934
+ results: [],
935
+ };
936
+ const result = await transition(state, ctx);
937
+ // Routes to CONFIRMING, no execution yet
938
+ expect(result.next.type).toBe('confirming');
939
+ expect(gated.execute).not.toHaveBeenCalled();
940
+ });
941
+ it('a failure in one batched call does not block the others', async () => {
942
+ const good = makeMockToolDef({ readOnly: true, execute: vi.fn().mockResolvedValue('ok') });
943
+ const bad = makeMockToolDef({
944
+ readOnly: true,
945
+ execute: vi.fn().mockRejectedValue(new Error('boom')),
946
+ });
947
+ const registry = makeMockRegistry({ good, bad });
948
+ const ctx = makeMockContext({ toolRegistry: registry });
949
+ const state = {
950
+ type: 'executing',
951
+ queue: [
952
+ { toolCallId: 'call-bad', toolName: 'bad', args: {} },
953
+ { toolCallId: 'call-good-2', toolName: 'good', args: {} },
954
+ ],
955
+ current: { toolCallId: 'call-good-1', toolName: 'good', args: {} },
956
+ results: [],
957
+ };
958
+ const result = await transition(state, ctx);
959
+ // All three tried, failure surfaced as an error tool-result for the bad one
960
+ expect(good.execute).toHaveBeenCalledTimes(2);
961
+ expect(bad.execute).toHaveBeenCalledTimes(1);
962
+ expect(result.next.type).toBe('thinking');
963
+ const toolMessages = ctx.messages.filter((m) => m.role === 'tool');
964
+ expect(toolMessages).toHaveLength(3);
965
+ // Error result is present in the SSE stream
966
+ const errorEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult && e.status === 'error');
967
+ expect(errorEvents).toHaveLength(1);
968
+ });
969
+ });
970
+ describe('handleConfirming (via transition)', () => {
971
+ it('approved confirmation resumes executing', async () => {
972
+ const ctx = makeMockContext({
973
+ waitForConfirmation: vi.fn().mockResolvedValue(true),
974
+ });
975
+ const state = {
976
+ type: 'confirming',
977
+ call: { toolCallId: 'call-1', toolName: 'delete_item', args: { id: '123' } },
978
+ remainingQueue: [],
979
+ };
980
+ const result = await transition(state, ctx);
981
+ expect(result.next.type).toBe('executing');
982
+ if (result.next.type === 'executing') {
983
+ expect(result.next.current.toolCallId).toBe('call-1');
984
+ }
985
+ });
986
+ it('denied confirmation transitions to thinking with denial message', async () => {
987
+ const ctx = makeMockContext({
988
+ waitForConfirmation: vi.fn().mockResolvedValue(false),
989
+ });
990
+ const state = {
991
+ type: 'confirming',
992
+ call: { toolCallId: 'call-1', toolName: 'delete_item', args: { id: '123' } },
993
+ remainingQueue: [],
994
+ };
995
+ const result = await transition(state, ctx);
996
+ expect(result.next.type).toBe('thinking');
997
+ // A denial message should have been appended
998
+ expect(ctx.messages.length).toBeGreaterThan(0);
999
+ });
1000
+ it('approved confirmation marks the callId in ctx.confirmedCallIds', async () => {
1001
+ const ctx = makeMockContext({
1002
+ waitForConfirmation: vi.fn().mockResolvedValue(true),
1003
+ });
1004
+ const state = {
1005
+ type: 'confirming',
1006
+ call: { toolCallId: 'call-XYZ', toolName: 'delete_item', args: { id: '123' } },
1007
+ remainingQueue: [],
1008
+ };
1009
+ await transition(state, ctx);
1010
+ expect(ctx.confirmedCallIds.has('call-XYZ')).toBe(true);
1011
+ });
1012
+ it('denied confirmation does NOT mark callId as confirmed', async () => {
1013
+ const ctx = makeMockContext({
1014
+ waitForConfirmation: vi.fn().mockResolvedValue(false),
1015
+ });
1016
+ const state = {
1017
+ type: 'confirming',
1018
+ call: { toolCallId: 'call-DENIED', toolName: 'delete_item', args: { id: '123' } },
1019
+ remainingQueue: [],
1020
+ };
1021
+ await transition(state, ctx);
1022
+ expect(ctx.confirmedCallIds.has('call-DENIED')).toBe(false);
1023
+ });
1024
+ it('intercepts dispatch_task and transitions to DISPATCHING', async () => {
1025
+ const dispatchTool = {
1026
+ description: 'Dispatch sub-task',
1027
+ parameters: z.object({ agent_name: z.string(), tools: z.array(z.string()), prompt: z.string() }),
1028
+ execute: vi.fn(),
1029
+ readOnly: false,
1030
+ metadata: { category: 'system' },
1031
+ };
1032
+ const registry = makeMockRegistry({ dispatch_task: dispatchTool });
1033
+ const ctx = makeMockContext({ toolRegistry: registry });
1034
+ const state = {
1035
+ type: 'executing',
1036
+ queue: [],
1037
+ current: {
1038
+ toolCallId: 'tc-dispatch',
1039
+ toolName: 'dispatch_task',
1040
+ args: { agent_name: 'fetcher', tools: ['request'], prompt: 'Fetch data' },
1041
+ },
1042
+ results: [],
1043
+ };
1044
+ const result = await transition(state, ctx);
1045
+ // Should transition to DISPATCHING, not execute the tool
1046
+ expect(result.next.type).toBe('dispatching');
1047
+ if (result.next.type === 'dispatching') {
1048
+ expect(result.next.task.agentName).toBe('fetcher');
1049
+ expect(result.next.task.toolSubset).toEqual(['request']);
1050
+ expect(result.next.task.prompt).toBe('Fetch data');
1051
+ expect(result.next.toolCallId).toBe('tc-dispatch');
1052
+ }
1053
+ // Should emit ToolCallStart but NOT call execute
1054
+ const startEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallStart);
1055
+ expect(startEvents.length).toBe(1);
1056
+ expect(dispatchTool.execute).not.toHaveBeenCalled();
1057
+ });
1058
+ it('strips dispatch_task from child tool subset', async () => {
1059
+ const dispatchTool = {
1060
+ description: 'Dispatch',
1061
+ parameters: z.object({ agent_name: z.string(), tools: z.array(z.string()), prompt: z.string() }),
1062
+ execute: vi.fn(),
1063
+ readOnly: false,
1064
+ metadata: { category: 'system' },
1065
+ };
1066
+ const registry = makeMockRegistry({ dispatch_task: dispatchTool });
1067
+ const ctx = makeMockContext({ toolRegistry: registry });
1068
+ const state = {
1069
+ type: 'executing',
1070
+ queue: [],
1071
+ current: {
1072
+ toolCallId: 'tc-d',
1073
+ toolName: 'dispatch_task',
1074
+ args: { agent_name: 'child', tools: ['request', 'dispatch_task', 'query_store'], prompt: 'Go' },
1075
+ },
1076
+ results: [],
1077
+ };
1078
+ const result = await transition(state, ctx);
1079
+ if (result.next.type === 'dispatching') {
1080
+ expect(result.next.task.toolSubset).toEqual(['request', 'query_store']);
1081
+ expect(result.next.task.toolSubset).not.toContain('dispatch_task');
1082
+ }
1083
+ });
1084
+ });
1085
+ describe('handleCompacting (via transition)', () => {
1086
+ it('summarizes old messages and keeps recent turns', async () => {
1087
+ // Build a conversation with enough turns to compact
1088
+ const messages = [];
1089
+ for (let i = 0; i < 10; i++) {
1090
+ messages.push({ role: 'user', content: `Question ${i}` });
1091
+ messages.push({ role: 'assistant', content: `Answer ${i}` });
1092
+ }
1093
+ const ctx = makeMockContext({
1094
+ provider: {
1095
+ model: 'test-model',
1096
+ provider: 'test',
1097
+ languageModel: {},
1098
+ streamText: vi.fn(),
1099
+ generateText: vi.fn().mockResolvedValue({
1100
+ text: '## Summary\nThis is a compacted summary.',
1101
+ toolCalls: [],
1102
+ usage: makeUsage({ inputTokens: 200, outputTokens: 100, totalTokens: 300 }),
1103
+ finishReason: 'stop',
1104
+ }),
1105
+ },
1106
+ });
1107
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
1108
+ expect(result.next.type).toBe('thinking');
1109
+ if (result.next.type === 'thinking') {
1110
+ // Should have fewer messages (summary + recent turns)
1111
+ expect(result.next.messages.length).toBeLessThan(messages.length);
1112
+ // First message should be the compaction summary (user role, not system,
1113
+ // because Anthropic rejects system messages after user/assistant turns)
1114
+ const firstMsg = result.next.messages[0];
1115
+ expect(firstMsg.role).toBe('user');
1116
+ const firstContent = firstMsg.content;
1117
+ expect(typeof firstContent === 'string' && firstContent.includes('Conversation Summary')).toBe(true);
1118
+ }
1119
+ // Should emit compaction_start and compaction_end SSE events
1120
+ const startEvents = result.effects.filter((e) => e.type === SSEEventType.CompactionStart);
1121
+ const endEvents = result.effects.filter((e) => e.type === SSEEventType.CompactionEnd);
1122
+ expect(startEvents.length).toBe(1);
1123
+ expect(endEvents.length).toBe(1);
1124
+ // Token usage should be tracked
1125
+ expect(ctx.usage.inputTokens).toBe(200);
1126
+ expect(ctx.usage.outputTokens).toBe(100);
1127
+ });
1128
+ it('skips compaction when too few messages to split', async () => {
1129
+ const messages = [
1130
+ { role: 'user', content: 'Hello' },
1131
+ { role: 'assistant', content: 'Hi!' },
1132
+ ];
1133
+ const ctx = makeMockContext();
1134
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 1000 }, ctx);
1135
+ expect(result.next.type).toBe('thinking');
1136
+ if (result.next.type === 'thinking') {
1137
+ // Messages unchanged — not enough to compact
1138
+ expect(result.next.messages).toBe(messages);
1139
+ }
1140
+ });
1141
+ it('circuit breaker skips compaction after repeated failures', async () => {
1142
+ const messages = [];
1143
+ for (let i = 0; i < 10; i++) {
1144
+ messages.push({ role: 'user', content: `Q${i}` });
1145
+ messages.push({ role: 'assistant', content: `A${i}` });
1146
+ }
1147
+ const ctx = makeMockContext({ compactionFailures: 3 }); // Already at threshold
1148
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
1149
+ expect(result.next.type).toBe('thinking');
1150
+ if (result.next.type === 'thinking') {
1151
+ expect(result.next.messages).toBe(messages); // Unchanged
1152
+ }
1153
+ expect(ctx.logger.warn).toHaveBeenCalledWith('compaction_circuit_breaker', expect.objectContaining({
1154
+ failures: 3,
1155
+ }));
1156
+ });
1157
+ it('increments failure counter on generateText error and continues', async () => {
1158
+ const messages = [];
1159
+ for (let i = 0; i < 10; i++) {
1160
+ messages.push({ role: 'user', content: `Q${i}` });
1161
+ messages.push({ role: 'assistant', content: `A${i}` });
1162
+ }
1163
+ const ctx = makeMockContext({
1164
+ provider: {
1165
+ model: 'test-model',
1166
+ provider: 'test',
1167
+ languageModel: {},
1168
+ streamText: vi.fn(),
1169
+ generateText: vi.fn().mockRejectedValue(new Error('Provider rate limited')),
1170
+ },
1171
+ });
1172
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
1173
+ // Should continue without compaction
1174
+ expect(result.next.type).toBe('thinking');
1175
+ if (result.next.type === 'thinking') {
1176
+ expect(result.next.messages).toBe(messages); // Unchanged
1177
+ }
1178
+ expect(ctx.compactionFailures).toBe(1);
1179
+ expect(ctx.logger.error).toHaveBeenCalledWith('compaction_failed', expect.objectContaining({
1180
+ error: 'Summarization failed: Provider rate limited',
1181
+ }));
1182
+ });
1183
+ it('resets failure counter on successful compaction', async () => {
1184
+ const messages = [];
1185
+ for (let i = 0; i < 10; i++) {
1186
+ messages.push({ role: 'user', content: `Q${i}` });
1187
+ messages.push({ role: 'assistant', content: `A${i}` });
1188
+ }
1189
+ const ctx = makeMockContext({
1190
+ compactionFailures: 2,
1191
+ provider: {
1192
+ model: 'test-model',
1193
+ provider: 'test',
1194
+ languageModel: {},
1195
+ streamText: vi.fn(),
1196
+ generateText: vi.fn().mockResolvedValue({
1197
+ text: 'Summary of conversation.',
1198
+ toolCalls: [],
1199
+ usage: makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }),
1200
+ finishReason: 'stop',
1201
+ }),
1202
+ },
1203
+ });
1204
+ await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
1205
+ expect(ctx.compactionFailures).toBe(0);
1206
+ });
1207
+ });
1208
+ describe('handleDispatching (via transition)', () => {
1209
+ it('runs child agent and returns text result to parent', async () => {
1210
+ const ctx = makeMockContext();
1211
+ const result = await transition({
1212
+ type: 'dispatching',
1213
+ task: { agentName: 'research-agent', toolSubset: [], prompt: 'Find info' },
1214
+ toolCallId: 'tc-dispatch-1',
1215
+ queue: [],
1216
+ results: [],
1217
+ }, ctx);
1218
+ // Child completes → parent goes to THINKING (no more queue items)
1219
+ expect(result.next.type).toBe('thinking');
1220
+ // Should have SubagentEvent effects (thought + complete)
1221
+ const subagentEvents = result.effects.filter((e) => e.type === SSEEventType.SubagentEvent);
1222
+ expect(subagentEvents.length).toBeGreaterThanOrEqual(1);
1223
+ // Should have a ToolCallResult for the dispatch_task call
1224
+ const toolResult = result.effects.find((e) => e.type === SSEEventType.ToolCallResult);
1225
+ expect(toolResult).toBeDefined();
1226
+ // Child usage merged into parent
1227
+ expect(ctx.usage.inputTokens).toBeGreaterThan(0);
1228
+ });
1229
+ it('emits SubagentEvent with correct parent_tool_id', async () => {
1230
+ const ctx = makeMockContext();
1231
+ const result = await transition({
1232
+ type: 'dispatching',
1233
+ task: { agentName: 'profiler', toolSubset: [], prompt: 'Profile entity' },
1234
+ toolCallId: 'tc-abc',
1235
+ queue: [],
1236
+ results: [],
1237
+ }, ctx);
1238
+ const subagentEvents = result.effects.filter((e) => e.type === SSEEventType.SubagentEvent);
1239
+ for (const event of subagentEvents) {
1240
+ if (event.type === SSEEventType.SubagentEvent) {
1241
+ expect(event.parent_tool_id).toBe('tc-abc');
1242
+ expect(event.agent_name).toBe('profiler');
1243
+ }
1244
+ }
1245
+ });
1246
+ it('continues execution queue after dispatch completes', async () => {
1247
+ const ctx = makeMockContext();
1248
+ const result = await transition({
1249
+ type: 'dispatching',
1250
+ task: { agentName: 'fetcher', toolSubset: [], prompt: 'Fetch data' },
1251
+ toolCallId: 'tc-dispatch',
1252
+ queue: [{ toolCallId: 'tc-next', toolName: 'request', args: {} }],
1253
+ results: [],
1254
+ }, ctx);
1255
+ // Should transition to executing the next tool in queue
1256
+ expect(result.next.type).toBe('executing');
1257
+ });
1258
+ it('handles child agent error gracefully', async () => {
1259
+ // Provider that throws on streamText
1260
+ const failingProvider = {
1261
+ model: 'test-model',
1262
+ provider: 'test',
1263
+ languageModel: {},
1264
+ streamText: vi.fn(() => { throw new Error('Provider crashed'); }),
1265
+ generateText: vi.fn(),
1266
+ };
1267
+ const ctx = makeMockContext({ provider: failingProvider });
1268
+ const result = await transition({
1269
+ type: 'dispatching',
1270
+ task: { agentName: 'broken-agent', toolSubset: [], prompt: 'Do something' },
1271
+ toolCallId: 'tc-fail',
1272
+ queue: [],
1273
+ results: [],
1274
+ }, ctx);
1275
+ // Should NOT crash — transitions to thinking so parent can recover
1276
+ expect(result.next.type).toBe('thinking');
1277
+ // Should emit SubagentEvent with error
1278
+ const errorEvents = result.effects.filter((e) => e.type === SSEEventType.SubagentEvent && 'event_type' in e && e.event_type === 'error');
1279
+ expect(errorEvents.length).toBeGreaterThanOrEqual(1);
1280
+ // Should emit ToolCallResult with error status
1281
+ const toolResult = result.effects.find((e) => e.type === SSEEventType.ToolCallResult);
1282
+ expect(toolResult).toBeDefined();
1283
+ if (toolResult && toolResult.type === SSEEventType.ToolCallResult) {
1284
+ expect(toolResult.status).toBe('error');
1285
+ }
1286
+ // Should log the error
1287
+ expect(ctx.logger.error).toHaveBeenCalledWith('dispatch_child_error', expect.objectContaining({
1288
+ agent: 'broken-agent',
1289
+ }));
1290
+ });
1291
+ it('propagates parent remaining token budget to child', async () => {
1292
+ // Parent has 100 budget, 90 already used → child should get 10.
1293
+ // Child's first mock turn yields 150 tokens, which exceeds the child's
1294
+ // 10-token budget; the child's outer loop catches it on the next check
1295
+ // and stops with budget_exceeded. Parent usage reflects the child's
1296
+ // consumed tokens once the child merges back.
1297
+ const parentCtx = makeMockContext({
1298
+ maxSessionTokens: 100,
1299
+ usage: { inputTokens: 60, outputTokens: 30, totalTokens: 90 },
1300
+ });
1301
+ const result = await transition({
1302
+ type: 'dispatching',
1303
+ task: { agentName: 'starved-child', toolSubset: [], prompt: 'Do lots of work' },
1304
+ toolCallId: 'tc-starved',
1305
+ queue: [],
1306
+ results: [],
1307
+ }, parentCtx);
1308
+ // Parent should resume (doesn't crash on child budget exhaustion)
1309
+ expect(result.next.type).toBe('thinking');
1310
+ // Child should have stopped early — its merged-back token usage should
1311
+ // not massively exceed the original budget, because the check fires on
1312
+ // the next outer loop iteration after the first 150-token turn.
1313
+ // (It won't be zero — the first turn runs fully and merges — but it
1314
+ // won't compound across many turns.)
1315
+ expect(parentCtx.usage.totalTokens).toBeLessThanOrEqual(90 + 150 + 150);
1316
+ });
1317
+ it('child inherits unlimited budget when parent has no cap', async () => {
1318
+ // No maxSessionTokens on parent → child should also have no cap (undefined).
1319
+ // The child runs through normal termination (model_stop), not budget.
1320
+ const parentCtx = makeMockContext({
1321
+ // maxSessionTokens intentionally omitted
1322
+ usage: { inputTokens: 500, outputTokens: 500, totalTokens: 1000 },
1323
+ });
1324
+ const result = await transition({
1325
+ type: 'dispatching',
1326
+ task: { agentName: 'unbounded-child', toolSubset: [], prompt: 'Go' },
1327
+ toolCallId: 'tc-unbounded',
1328
+ queue: [],
1329
+ results: [],
1330
+ }, parentCtx);
1331
+ expect(result.next.type).toBe('thinking');
1332
+ // No agent_loop_budget_exceeded log should have fired for the child
1333
+ expect(parentCtx.logger.warn).not.toHaveBeenCalledWith('agent_loop_budget_exceeded', expect.anything());
1334
+ });
1335
+ });
1336
+ // ---------------------------------------------------------------------------
1337
+ // 3. Integration: runAgent() full flow
1338
+ // ---------------------------------------------------------------------------
1339
+ describe('runAgent', () => {
1340
+ it('text-only conversation: init → text_delta → done with usage', async () => {
1341
+ const ctx = makeMockContext();
1342
+ const events = [];
1343
+ for await (const event of runAgent({
1344
+ messages: [{ role: 'user', content: 'Hello' }],
1345
+ context: ctx,
1346
+ })) {
1347
+ events.push(event);
1348
+ }
1349
+ // Init is first
1350
+ expect(events[0].type).toBe(SSEEventType.Init);
1351
+ // Done is last
1352
+ const lastEvent = events[events.length - 1];
1353
+ expect(lastEvent.type).toBe(SSEEventType.Done);
1354
+ // Done always has usage (G2)
1355
+ const doneEvent = lastEvent;
1356
+ expect(doneEvent.usage).toBeDefined();
1357
+ expect(doneEvent.usage?.input_tokens).toBeGreaterThanOrEqual(0);
1358
+ expect(doneEvent.usage?.output_tokens).toBeGreaterThanOrEqual(0);
1359
+ });
1360
+ it('tool call conversation: init → text → tool_start → tool_result → done', async () => {
1361
+ const searchTool = makeMockToolDef({
1362
+ execute: vi.fn().mockResolvedValue({ results: ['found'] }),
1363
+ });
1364
+ const registry = makeMockRegistry({ search: searchTool });
1365
+ let callCount = 0;
1366
+ const provider = {
1367
+ model: 'test-model',
1368
+ provider: 'test',
1369
+ languageModel: {},
1370
+ streamText: vi.fn(() => {
1371
+ callCount++;
1372
+ if (callCount === 1) {
1373
+ // First call: model requests a tool
1374
+ return makeMockStream([
1375
+ { type: 'text-delta', textDelta: 'Searching...' },
1376
+ { type: 'tool-call', toolCallId: 'c1', toolName: 'search', args: { q: 'test' } },
1377
+ { type: 'finish', usage: makeUsage({ inputTokens: 50, outputTokens: 20, totalTokens: 70 }) },
1378
+ ], 'Searching...');
1379
+ }
1380
+ // Second call: model responds with text
1381
+ return makeMockStream([
1382
+ { type: 'text-delta', textDelta: 'Found results.' },
1383
+ { type: 'finish', usage: makeUsage({ inputTokens: 80, outputTokens: 30, totalTokens: 110 }) },
1384
+ ], 'Found results.');
1385
+ }),
1386
+ generateText: vi.fn(),
1387
+ };
1388
+ const ctx = makeMockContext({ provider, toolRegistry: registry });
1389
+ const events = [];
1390
+ for await (const event of runAgent({
1391
+ messages: [{ role: 'user', content: 'Search for test' }],
1392
+ context: ctx,
1393
+ })) {
1394
+ events.push(event);
1395
+ }
1396
+ // Should have init, text deltas, tool events, more text deltas, done
1397
+ const types = events.map((e) => e.type);
1398
+ expect(types[0]).toBe(SSEEventType.Init);
1399
+ expect(types[types.length - 1]).toBe(SSEEventType.Done);
1400
+ expect(types).toContain(SSEEventType.ToolCallStart);
1401
+ expect(types).toContain(SSEEventType.ToolCallResult);
1402
+ // tool_call_start must come before tool_call_result
1403
+ const startIdx = types.indexOf(SSEEventType.ToolCallStart);
1404
+ const resultIdx = types.indexOf(SSEEventType.ToolCallResult);
1405
+ expect(startIdx).toBeLessThan(resultIdx);
1406
+ // Provider should have been called twice (thinking → tool → thinking → done)
1407
+ expect(provider.streamText).toHaveBeenCalledTimes(2);
1408
+ });
1409
+ it('abort signal terminates the loop with user_abort', async () => {
1410
+ const abortController = new AbortController();
1411
+ // Provider that aborts after first call
1412
+ const provider = {
1413
+ model: 'test-model',
1414
+ provider: 'test',
1415
+ languageModel: {},
1416
+ streamText: vi.fn(() => {
1417
+ // Abort after stream starts
1418
+ abortController.abort();
1419
+ return makeMockStream([
1420
+ { type: 'text-delta', textDelta: 'Starting...' },
1421
+ { type: 'finish', usage: makeUsage() },
1422
+ ]);
1423
+ }),
1424
+ generateText: vi.fn(),
1425
+ };
1426
+ const ctx = makeMockContext({ provider, signal: abortController.signal });
1427
+ const events = [];
1428
+ for await (const event of runAgent({
1429
+ messages: [{ role: 'user', content: 'Hello' }],
1430
+ context: ctx,
1431
+ })) {
1432
+ events.push(event);
1433
+ }
1434
+ // Should end with done event
1435
+ const doneEvent = events[events.length - 1];
1436
+ expect(doneEvent.type).toBe(SSEEventType.Done);
1437
+ // Usage should be present even on abort
1438
+ expect(doneEvent.usage).toBeDefined();
1439
+ });
1440
+ it('max turns terminates the loop', async () => {
1441
+ const tool = makeMockToolDef();
1442
+ const registry = makeMockRegistry({ loop_tool: tool });
1443
+ // Provider always requests a tool call (infinite loop)
1444
+ const provider = {
1445
+ model: 'test-model',
1446
+ provider: 'test',
1447
+ languageModel: {},
1448
+ streamText: vi.fn(() => makeMockStream([
1449
+ { type: 'tool-call', toolCallId: `c-${Date.now()}`, toolName: 'loop_tool', args: {} },
1450
+ { type: 'finish', usage: makeUsage({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }) },
1451
+ ], '')),
1452
+ generateText: vi.fn(),
1453
+ };
1454
+ const ctx = makeMockContext({
1455
+ provider,
1456
+ toolRegistry: registry,
1457
+ maxTurns: 3,
1458
+ });
1459
+ const events = [];
1460
+ for await (const event of runAgent({
1461
+ messages: [{ role: 'user', content: 'Loop forever' }],
1462
+ context: ctx,
1463
+ })) {
1464
+ events.push(event);
1465
+ }
1466
+ // Should have terminated
1467
+ const doneEvent = events[events.length - 1];
1468
+ expect(doneEvent.type).toBe(SSEEventType.Done);
1469
+ // Turn count should not exceed maxTurns
1470
+ expect(ctx.turnCount).toBeLessThanOrEqual(3);
1471
+ });
1472
+ it('token budget terminates the loop with reason=budget_exceeded', async () => {
1473
+ const tool = makeMockToolDef();
1474
+ const registry = makeMockRegistry({ loop_tool: tool });
1475
+ // Each turn yields 15 tokens (10 in + 5 out). With maxSessionTokens=30, the loop
1476
+ // should stop after the 2nd turn pushes cumulative usage past the cap.
1477
+ const provider = {
1478
+ model: 'test-model',
1479
+ provider: 'test',
1480
+ languageModel: {},
1481
+ streamText: vi.fn(() => makeMockStream([
1482
+ { type: 'tool-call', toolCallId: `c-${Date.now()}`, toolName: 'loop_tool', args: {} },
1483
+ { type: 'finish', usage: makeUsage({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }) },
1484
+ ], '')),
1485
+ generateText: vi.fn(),
1486
+ };
1487
+ const ctx = makeMockContext({
1488
+ provider,
1489
+ toolRegistry: registry,
1490
+ maxTurns: 100,
1491
+ maxSessionTokens: 30,
1492
+ });
1493
+ const events = [];
1494
+ for await (const event of runAgent({
1495
+ messages: [{ role: 'user', content: 'Burn tokens' }],
1496
+ context: ctx,
1497
+ })) {
1498
+ events.push(event);
1499
+ }
1500
+ // Should have terminated on budget, not max_turns
1501
+ const doneEvent = events[events.length - 1];
1502
+ expect(doneEvent.type).toBe(SSEEventType.Done);
1503
+ expect(ctx.turnCount).toBeLessThan(100);
1504
+ expect(ctx.usage.totalTokens).toBeGreaterThanOrEqual(30);
1505
+ });
1506
+ it('undefined maxSessionTokens means no budget cap', async () => {
1507
+ // Same infinite-tool-call provider as max_turns test, but with no
1508
+ // maxSessionTokens set and a small maxTurns to bound the test. Verifies that
1509
+ // undefined budget doesn't accidentally trip the check.
1510
+ const tool = makeMockToolDef();
1511
+ const registry = makeMockRegistry({ loop_tool: tool });
1512
+ const provider = {
1513
+ model: 'test-model',
1514
+ provider: 'test',
1515
+ languageModel: {},
1516
+ streamText: vi.fn(() => makeMockStream([
1517
+ { type: 'tool-call', toolCallId: `c-${Date.now()}`, toolName: 'loop_tool', args: {} },
1518
+ { type: 'finish', usage: makeUsage({ inputTokens: 100, outputTokens: 100, totalTokens: 200 }) },
1519
+ ], '')),
1520
+ generateText: vi.fn(),
1521
+ };
1522
+ const ctx = makeMockContext({
1523
+ provider,
1524
+ toolRegistry: registry,
1525
+ maxTurns: 2,
1526
+ // maxSessionTokens intentionally omitted
1527
+ });
1528
+ const events = [];
1529
+ for await (const event of runAgent({
1530
+ messages: [{ role: 'user', content: 'Run' }],
1531
+ context: ctx,
1532
+ })) {
1533
+ events.push(event);
1534
+ }
1535
+ // Should stop on max_turns, budget check should not interfere
1536
+ const doneEvent = events[events.length - 1];
1537
+ expect(doneEvent.type).toBe(SSEEventType.Done);
1538
+ expect(ctx.usage.totalTokens).toBeGreaterThan(0);
1539
+ // No budget-exceeded log should have been emitted
1540
+ // (positive assertion: we reached max_turns, accumulating tokens beyond any tiny cap)
1541
+ });
1542
+ it('done event always includes usage regardless of reason (G2)', async () => {
1543
+ // Abort immediately
1544
+ const abortController = new AbortController();
1545
+ abortController.abort();
1546
+ const ctx = makeMockContext({
1547
+ signal: abortController.signal,
1548
+ usage: makeUsage({ inputTokens: 42, outputTokens: 13, totalTokens: 55 }),
1549
+ });
1550
+ const events = [];
1551
+ for await (const event of runAgent({
1552
+ messages: [{ role: 'user', content: 'Test' }],
1553
+ context: ctx,
1554
+ })) {
1555
+ events.push(event);
1556
+ }
1557
+ const doneEvent = events[events.length - 1];
1558
+ expect(doneEvent.type).toBe(SSEEventType.Done);
1559
+ expect(doneEvent.usage).toBeDefined();
1560
+ expect(doneEvent.usage?.input_tokens).toBe(42);
1561
+ expect(doneEvent.usage?.output_tokens).toBe(13);
1562
+ });
1563
+ it('init event has session_id', async () => {
1564
+ const ctx = makeMockContext({ sessionId: 'sess-abc' });
1565
+ const events = [];
1566
+ for await (const event of runAgent({
1567
+ messages: [{ role: 'user', content: 'Hi' }],
1568
+ context: ctx,
1569
+ })) {
1570
+ events.push(event);
1571
+ }
1572
+ const initEvent = events[0];
1573
+ expect(initEvent.type).toBe(SSEEventType.Init);
1574
+ expect(initEvent.session_id).toBe('sess-abc');
1575
+ });
1576
+ it('logs agent_loop_start and agent_loop_done', async () => {
1577
+ const ctx = makeMockContext();
1578
+ const events = [];
1579
+ for await (const event of runAgent({
1580
+ messages: [{ role: 'user', content: 'Hello' }],
1581
+ context: ctx,
1582
+ })) {
1583
+ events.push(event);
1584
+ }
1585
+ expect(ctx.logger.info).toHaveBeenCalledWith('agent_loop_start', expect.objectContaining({
1586
+ session: 'test-session',
1587
+ }));
1588
+ expect(ctx.logger.info).toHaveBeenCalledWith('agent_loop_done', expect.objectContaining({
1589
+ session: 'test-session',
1590
+ reason: expect.any(String),
1591
+ }));
1592
+ });
1593
+ });
1594
+ //# sourceMappingURL=loop.test.js.map