@amodalai/runtime 0.1.26 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/dist/src/__fixtures__/README.md +84 -0
  2. package/dist/src/__fixtures__/smoke-agent/amodal.json +11 -0
  3. package/dist/src/__fixtures__/smoke-agent/automations/test-auto.md +5 -0
  4. package/dist/src/__fixtures__/smoke-agent/connections/mock-api/access.json +11 -0
  5. package/dist/src/__fixtures__/smoke-agent/connections/mock-api/spec.json +4 -0
  6. package/dist/src/__fixtures__/smoke-agent/connections/mock-api/surface.md +9 -0
  7. package/dist/src/__fixtures__/smoke-agent/connections/mock-mcp/access.json +3 -0
  8. package/dist/src/__fixtures__/smoke-agent/connections/mock-mcp/spec.json +8 -0
  9. package/dist/src/__fixtures__/smoke-agent/evals/basic-eval.md +12 -0
  10. package/dist/src/__fixtures__/smoke-agent/knowledge/test-knowledge.md +3 -0
  11. package/dist/src/__fixtures__/smoke-agent/skills/test-skill/SKILL.md +11 -0
  12. package/dist/src/__fixtures__/smoke-agent/stores/test-items.json +11 -0
  13. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/handler.d.ts +18 -0
  14. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/handler.js +22 -0
  15. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/handler.js.map +1 -0
  16. package/dist/src/__fixtures__/smoke-agent/tools/echo_tool/tool.json +17 -0
  17. package/dist/src/__fixtures__/smoke.test.js +718 -0
  18. package/dist/src/__fixtures__/smoke.test.js.map +1 -0
  19. package/dist/src/__tests__/test-providers.d.ts +40 -0
  20. package/dist/src/__tests__/test-providers.js +61 -0
  21. package/dist/src/__tests__/test-providers.js.map +1 -0
  22. package/dist/src/agent/local-server.d.ts +3 -3
  23. package/dist/src/agent/local-server.js +213 -122
  24. package/dist/src/agent/local-server.js.map +1 -1
  25. package/dist/src/agent/loop-types.d.ts +175 -0
  26. package/dist/src/agent/loop-types.js +20 -0
  27. package/dist/src/agent/loop-types.js.map +1 -0
  28. package/dist/src/agent/loop.d.ts +31 -0
  29. package/dist/src/agent/loop.js +139 -0
  30. package/dist/src/agent/loop.js.map +1 -0
  31. package/dist/src/agent/loop.test.js +1030 -0
  32. package/dist/src/agent/loop.test.js.map +1 -0
  33. package/dist/src/agent/mcp-config.d.ts +28 -0
  34. package/dist/src/agent/mcp-config.js +57 -0
  35. package/dist/src/agent/mcp-config.js.map +1 -0
  36. package/dist/src/agent/page-builder.js +6 -1
  37. package/dist/src/agent/page-builder.js.map +1 -1
  38. package/dist/src/agent/proactive/proactive-runner.d.ts +24 -8
  39. package/dist/src/agent/proactive/proactive-runner.js +30 -32
  40. package/dist/src/agent/proactive/proactive-runner.js.map +1 -1
  41. package/dist/src/agent/proactive/proactive-runner.test.d.ts +1 -1
  42. package/dist/src/agent/proactive/proactive-runner.test.js +75 -87
  43. package/dist/src/agent/proactive/proactive-runner.test.js.map +1 -1
  44. package/dist/src/agent/routes/admin-chat.d.ts +15 -3
  45. package/dist/src/agent/routes/admin-chat.js +63 -18
  46. package/dist/src/agent/routes/admin-chat.js.map +1 -1
  47. package/dist/src/agent/routes/automations.js +5 -6
  48. package/dist/src/agent/routes/automations.js.map +1 -1
  49. package/dist/src/agent/routes/evals.d.ts +3 -2
  50. package/dist/src/agent/routes/evals.js +25 -12
  51. package/dist/src/agent/routes/evals.js.map +1 -1
  52. package/dist/src/agent/routes/files.js +7 -9
  53. package/dist/src/agent/routes/files.js.map +1 -1
  54. package/dist/src/agent/routes/inspect.d.ts +6 -2
  55. package/dist/src/agent/routes/inspect.js +31 -17
  56. package/dist/src/agent/routes/inspect.js.map +1 -1
  57. package/dist/src/agent/routes/inspect.test.js +18 -42
  58. package/dist/src/agent/routes/inspect.test.js.map +1 -1
  59. package/dist/src/agent/routes/stores.js +9 -12
  60. package/dist/src/agent/routes/stores.js.map +1 -1
  61. package/dist/src/agent/routes/task.d.ts +15 -3
  62. package/dist/src/agent/routes/task.js +16 -7
  63. package/dist/src/agent/routes/task.js.map +1 -1
  64. package/dist/src/agent/routes/task.test.d.ts +1 -1
  65. package/dist/src/agent/routes/task.test.js +70 -53
  66. package/dist/src/agent/routes/task.test.js.map +1 -1
  67. package/dist/src/agent/routes/webhooks.js +12 -3
  68. package/dist/src/agent/routes/webhooks.js.map +1 -1
  69. package/dist/src/agent/session-store.d.ts +11 -2
  70. package/dist/src/agent/session-store.js +1 -1
  71. package/dist/src/agent/session-store.js.map +1 -1
  72. package/dist/src/agent/snapshot-server.d.ts +2 -22
  73. package/dist/src/agent/snapshot-server.js +50 -27
  74. package/dist/src/agent/snapshot-server.js.map +1 -1
  75. package/dist/src/agent/states/compacting.d.ts +14 -0
  76. package/dist/src/agent/states/compacting.js +258 -0
  77. package/dist/src/agent/states/compacting.js.map +1 -0
  78. package/dist/src/agent/states/confirming.d.ts +10 -0
  79. package/dist/src/agent/states/confirming.js +76 -0
  80. package/dist/src/agent/states/confirming.js.map +1 -0
  81. package/dist/src/agent/states/dispatching.d.ts +18 -0
  82. package/dist/src/agent/states/dispatching.js +241 -0
  83. package/dist/src/agent/states/dispatching.js.map +1 -0
  84. package/dist/src/agent/states/executing.d.ts +21 -0
  85. package/dist/src/agent/states/executing.js +308 -0
  86. package/dist/src/agent/states/executing.js.map +1 -0
  87. package/dist/src/agent/states/streaming.d.ts +10 -0
  88. package/dist/src/agent/states/streaming.js +155 -0
  89. package/dist/src/agent/states/streaming.js.map +1 -0
  90. package/dist/src/agent/states/thinking.d.ts +13 -0
  91. package/dist/src/agent/states/thinking.js +233 -0
  92. package/dist/src/agent/states/thinking.js.map +1 -0
  93. package/dist/src/agent/token-estimate.d.ts +17 -0
  94. package/dist/src/agent/token-estimate.js +13 -0
  95. package/dist/src/agent/token-estimate.js.map +1 -0
  96. package/dist/src/agent/tool-executor-local.js +9 -18
  97. package/dist/src/agent/tool-executor-local.js.map +1 -1
  98. package/dist/src/agent/tool-executor-local.test.js +3 -5
  99. package/dist/src/agent/tool-executor-local.test.js.map +1 -1
  100. package/dist/src/api/create-agent.d.ts +15 -0
  101. package/dist/src/api/create-agent.js +137 -0
  102. package/dist/src/api/create-agent.js.map +1 -0
  103. package/dist/src/api/types.d.ts +68 -0
  104. package/dist/src/api/types.js +7 -0
  105. package/dist/src/api/types.js.map +1 -0
  106. package/dist/src/context/compiler.d.ts +13 -0
  107. package/dist/src/context/compiler.js +358 -0
  108. package/dist/src/context/compiler.js.map +1 -0
  109. package/dist/src/context/compiler.test.js +532 -0
  110. package/dist/src/context/compiler.test.js.map +1 -0
  111. package/dist/src/context/types.d.ts +110 -0
  112. package/dist/src/context/types.js +7 -0
  113. package/dist/src/context/types.js.map +1 -0
  114. package/dist/src/index.d.ts +33 -6
  115. package/dist/src/index.js +35 -21
  116. package/dist/src/index.js.map +1 -1
  117. package/dist/src/providers/create-provider.d.ts +23 -0
  118. package/dist/src/providers/create-provider.js +185 -0
  119. package/dist/src/providers/create-provider.js.map +1 -0
  120. package/dist/src/{agent/stores-e2e.test.d.ts → providers/create-provider.test.d.ts} +1 -1
  121. package/dist/src/providers/create-provider.test.js +95 -0
  122. package/dist/src/providers/create-provider.test.js.map +1 -0
  123. package/dist/src/providers/failover.d.ts +38 -0
  124. package/dist/src/providers/failover.js +147 -0
  125. package/dist/src/providers/failover.js.map +1 -0
  126. package/dist/src/providers/failover.test.d.ts +6 -0
  127. package/dist/src/providers/failover.test.js +169 -0
  128. package/dist/src/providers/failover.test.js.map +1 -0
  129. package/dist/src/providers/types.d.ts +110 -0
  130. package/dist/src/providers/types.js +7 -0
  131. package/dist/src/providers/types.js.map +1 -0
  132. package/dist/src/routes/ai-stream.d.ts +13 -10
  133. package/dist/src/routes/ai-stream.js +76 -41
  134. package/dist/src/routes/ai-stream.js.map +1 -1
  135. package/dist/src/routes/chat-new.test.d.ts +6 -0
  136. package/dist/src/routes/chat-new.test.js +107 -0
  137. package/dist/src/routes/chat-new.test.js.map +1 -0
  138. package/dist/src/routes/chat-stream-new.test.d.ts +6 -0
  139. package/dist/src/routes/chat-stream-new.test.js +135 -0
  140. package/dist/src/routes/chat-stream-new.test.js.map +1 -0
  141. package/dist/src/routes/chat-stream.d.ts +14 -4
  142. package/dist/src/routes/chat-stream.js +47 -29
  143. package/dist/src/routes/chat-stream.js.map +1 -1
  144. package/dist/src/routes/chat.d.ts +13 -4
  145. package/dist/src/routes/chat.js +60 -23
  146. package/dist/src/routes/chat.js.map +1 -1
  147. package/dist/src/routes/health.d.ts +3 -2
  148. package/dist/src/routes/health.js.map +1 -1
  149. package/dist/src/routes/route-helpers.d.ts +50 -0
  150. package/dist/src/routes/route-helpers.js +80 -0
  151. package/dist/src/routes/route-helpers.js.map +1 -0
  152. package/dist/src/routes/session-resolver.d.ts +72 -0
  153. package/dist/src/routes/session-resolver.js +123 -0
  154. package/dist/src/routes/session-resolver.js.map +1 -0
  155. package/dist/src/routes/session-resolver.test.d.ts +6 -0
  156. package/dist/src/routes/session-resolver.test.js +206 -0
  157. package/dist/src/routes/session-resolver.test.js.map +1 -0
  158. package/dist/src/routes/webhooks.d.ts +3 -1
  159. package/dist/src/routes/webhooks.js +12 -4
  160. package/dist/src/routes/webhooks.js.map +1 -1
  161. package/dist/src/security/permission-checker.d.ts +80 -0
  162. package/dist/src/security/permission-checker.js +75 -0
  163. package/dist/src/security/permission-checker.js.map +1 -0
  164. package/dist/src/security/permission-checker.test.d.ts +6 -0
  165. package/dist/src/security/permission-checker.test.js +208 -0
  166. package/dist/src/security/permission-checker.test.js.map +1 -0
  167. package/dist/src/server.d.ts +12 -11
  168. package/dist/src/server.js +44 -46
  169. package/dist/src/server.js.map +1 -1
  170. package/dist/src/server.test.d.ts +1 -1
  171. package/dist/src/server.test.js +6 -144
  172. package/dist/src/server.test.js.map +1 -1
  173. package/dist/src/session/manager.d.ts +98 -0
  174. package/dist/src/session/manager.js +364 -0
  175. package/dist/src/session/manager.js.map +1 -0
  176. package/dist/src/session/manager.test.d.ts +6 -0
  177. package/dist/src/session/manager.test.js +315 -0
  178. package/dist/src/session/manager.test.js.map +1 -0
  179. package/dist/src/session/session-builder.d.ts +71 -0
  180. package/dist/src/session/session-builder.js +364 -0
  181. package/dist/src/session/session-builder.js.map +1 -0
  182. package/dist/src/session/session-builder.test.d.ts +6 -0
  183. package/dist/src/session/session-builder.test.js +352 -0
  184. package/dist/src/session/session-builder.test.js.map +1 -0
  185. package/dist/src/session/store.d.ts +57 -0
  186. package/dist/src/session/store.js +167 -0
  187. package/dist/src/session/store.js.map +1 -0
  188. package/dist/src/session/store.test.d.ts +6 -0
  189. package/dist/src/session/store.test.js +145 -0
  190. package/dist/src/session/store.test.js.map +1 -0
  191. package/dist/src/session/stream-hooks.d.ts +39 -0
  192. package/dist/src/session/stream-hooks.js +7 -0
  193. package/dist/src/session/stream-hooks.js.map +1 -0
  194. package/dist/src/session/tool-context-factory.d.ts +60 -0
  195. package/dist/src/session/tool-context-factory.js +190 -0
  196. package/dist/src/session/tool-context-factory.js.map +1 -0
  197. package/dist/src/session/tool-context-factory.test.d.ts +6 -0
  198. package/dist/src/session/tool-context-factory.test.js +287 -0
  199. package/dist/src/session/tool-context-factory.test.js.map +1 -0
  200. package/dist/src/session/types.d.ts +188 -0
  201. package/dist/src/session/types.js +7 -0
  202. package/dist/src/session/types.js.map +1 -0
  203. package/dist/src/stores/drizzle-store-backend.d.ts +49 -0
  204. package/dist/src/stores/drizzle-store-backend.js +306 -0
  205. package/dist/src/stores/drizzle-store-backend.js.map +1 -0
  206. package/dist/src/stores/drizzle-store-backend.test.d.ts +6 -0
  207. package/dist/src/stores/drizzle-store-backend.test.js +215 -0
  208. package/dist/src/stores/drizzle-store-backend.test.js.map +1 -0
  209. package/dist/src/stores/index.d.ts +4 -0
  210. package/dist/src/stores/index.js +2 -0
  211. package/dist/src/stores/index.js.map +1 -1
  212. package/dist/src/stores/pglite-store-backend.d.ts +16 -19
  213. package/dist/src/stores/pglite-store-backend.js +85 -239
  214. package/dist/src/stores/pglite-store-backend.js.map +1 -1
  215. package/dist/src/stores/postgres-store-backend.d.ts +30 -0
  216. package/dist/src/stores/postgres-store-backend.js +100 -0
  217. package/dist/src/stores/postgres-store-backend.js.map +1 -0
  218. package/dist/src/stores/schema.d.ts +491 -0
  219. package/dist/src/stores/schema.js +57 -0
  220. package/dist/src/stores/schema.js.map +1 -0
  221. package/dist/src/tools/admin-file-tools.d.ts +13 -0
  222. package/dist/src/tools/admin-file-tools.js +200 -0
  223. package/dist/src/tools/admin-file-tools.js.map +1 -0
  224. package/dist/src/tools/admin-file-tools.test.d.ts +6 -0
  225. package/dist/src/tools/admin-file-tools.test.js +152 -0
  226. package/dist/src/tools/admin-file-tools.test.js.map +1 -0
  227. package/dist/src/tools/custom-tool-adapter.d.ts +41 -0
  228. package/dist/src/tools/custom-tool-adapter.js +190 -0
  229. package/dist/src/tools/custom-tool-adapter.js.map +1 -0
  230. package/dist/src/tools/custom-tool-adapter.test.d.ts +6 -0
  231. package/dist/src/tools/custom-tool-adapter.test.js +244 -0
  232. package/dist/src/tools/custom-tool-adapter.test.js.map +1 -0
  233. package/dist/src/tools/dispatch-tool.d.ts +52 -0
  234. package/dist/src/tools/dispatch-tool.js +71 -0
  235. package/dist/src/tools/dispatch-tool.js.map +1 -0
  236. package/dist/src/tools/dispatch-tool.test.d.ts +6 -0
  237. package/dist/src/tools/dispatch-tool.test.js +75 -0
  238. package/dist/src/tools/dispatch-tool.test.js.map +1 -0
  239. package/dist/src/tools/mcp-tool-adapter.d.ts +18 -0
  240. package/dist/src/tools/mcp-tool-adapter.js +135 -0
  241. package/dist/src/tools/mcp-tool-adapter.js.map +1 -0
  242. package/dist/src/tools/mcp-tool-adapter.test.d.ts +6 -0
  243. package/dist/src/tools/mcp-tool-adapter.test.js +227 -0
  244. package/dist/src/tools/mcp-tool-adapter.test.js.map +1 -0
  245. package/dist/src/tools/registry.d.ts +25 -0
  246. package/dist/src/tools/registry.js +72 -0
  247. package/dist/src/tools/registry.js.map +1 -0
  248. package/dist/src/tools/registry.test.d.ts +6 -0
  249. package/dist/src/tools/registry.test.js +121 -0
  250. package/dist/src/tools/registry.test.js.map +1 -0
  251. package/dist/src/tools/request-tool.d.ts +42 -0
  252. package/dist/src/tools/request-tool.js +190 -0
  253. package/dist/src/tools/request-tool.js.map +1 -0
  254. package/dist/src/tools/request-tool.test.d.ts +6 -0
  255. package/dist/src/tools/request-tool.test.js +254 -0
  256. package/dist/src/tools/request-tool.test.js.map +1 -0
  257. package/dist/src/tools/store-tools.d.ts +29 -0
  258. package/dist/src/tools/store-tools.js +224 -0
  259. package/dist/src/tools/store-tools.js.map +1 -0
  260. package/dist/src/tools/store-tools.test.d.ts +6 -0
  261. package/dist/src/tools/store-tools.test.js +216 -0
  262. package/dist/src/tools/store-tools.test.js.map +1 -0
  263. package/dist/src/tools/types.d.ts +111 -0
  264. package/dist/src/tools/types.js +7 -0
  265. package/dist/src/tools/types.js.map +1 -0
  266. package/dist/src/types.d.ts +20 -12
  267. package/dist/src/types.js +3 -2
  268. package/dist/src/types.js.map +1 -1
  269. package/dist/tsconfig.tsbuildinfo +1 -1
  270. package/package.json +13 -4
  271. package/dist/src/__tests__/sse-contract.test.js +0 -464
  272. package/dist/src/__tests__/sse-contract.test.js.map +0 -1
  273. package/dist/src/__tests__/tools.test.js +0 -583
  274. package/dist/src/__tests__/tools.test.js.map +0 -1
  275. package/dist/src/agent/agent-runner.d.ts +0 -33
  276. package/dist/src/agent/agent-runner.js +0 -1040
  277. package/dist/src/agent/agent-runner.js.map +0 -1
  278. package/dist/src/agent/custom-tools-e2e.test.d.ts +0 -6
  279. package/dist/src/agent/custom-tools-e2e.test.js +0 -566
  280. package/dist/src/agent/custom-tools-e2e.test.js.map +0 -1
  281. package/dist/src/agent/request-helper.d.ts +0 -16
  282. package/dist/src/agent/request-helper.js +0 -96
  283. package/dist/src/agent/request-helper.js.map +0 -1
  284. package/dist/src/agent/stores-e2e.test.js +0 -433
  285. package/dist/src/agent/stores-e2e.test.js.map +0 -1
  286. package/dist/src/agent/tool-context-builder.d.ts +0 -11
  287. package/dist/src/agent/tool-context-builder.js +0 -102
  288. package/dist/src/agent/tool-context-builder.js.map +0 -1
  289. package/dist/src/agent/tool-context-builder.test.d.ts +0 -6
  290. package/dist/src/agent/tool-context-builder.test.js +0 -152
  291. package/dist/src/agent/tool-context-builder.test.js.map +0 -1
  292. package/dist/src/agent/write-repo-file.test.js +0 -270
  293. package/dist/src/agent/write-repo-file.test.js.map +0 -1
  294. package/dist/src/cron/heartbeat-runner.d.ts +0 -21
  295. package/dist/src/cron/heartbeat-runner.js +0 -79
  296. package/dist/src/cron/heartbeat-runner.js.map +0 -1
  297. package/dist/src/cron/heartbeat-runner.test.d.ts +0 -6
  298. package/dist/src/cron/heartbeat-runner.test.js +0 -120
  299. package/dist/src/cron/heartbeat-runner.test.js.map +0 -1
  300. package/dist/src/cron/heartbeat-scheduler.d.ts +0 -26
  301. package/dist/src/cron/heartbeat-scheduler.js +0 -55
  302. package/dist/src/cron/heartbeat-scheduler.js.map +0 -1
  303. package/dist/src/cron/heartbeat-scheduler.test.d.ts +0 -6
  304. package/dist/src/cron/heartbeat-scheduler.test.js +0 -61
  305. package/dist/src/cron/heartbeat-scheduler.test.js.map +0 -1
  306. package/dist/src/routes/ai-stream.test.d.ts +0 -6
  307. package/dist/src/routes/ai-stream.test.js +0 -586
  308. package/dist/src/routes/ai-stream.test.js.map +0 -1
  309. package/dist/src/routes/ask-user-response.d.ts +0 -30
  310. package/dist/src/routes/ask-user-response.js +0 -61
  311. package/dist/src/routes/ask-user-response.js.map +0 -1
  312. package/dist/src/routes/ask-user-response.test.d.ts +0 -6
  313. package/dist/src/routes/ask-user-response.test.js +0 -88
  314. package/dist/src/routes/ask-user-response.test.js.map +0 -1
  315. package/dist/src/routes/chat-stream.test.d.ts +0 -6
  316. package/dist/src/routes/chat-stream.test.js +0 -155
  317. package/dist/src/routes/chat-stream.test.js.map +0 -1
  318. package/dist/src/routes/chat.test.d.ts +0 -6
  319. package/dist/src/routes/chat.test.js +0 -99
  320. package/dist/src/routes/chat.test.js.map +0 -1
  321. package/dist/src/routes/widget-actions.d.ts +0 -49
  322. package/dist/src/routes/widget-actions.js +0 -78
  323. package/dist/src/routes/widget-actions.js.map +0 -1
  324. package/dist/src/session/custom-tool-adapter.d.ts +0 -74
  325. package/dist/src/session/custom-tool-adapter.js +0 -180
  326. package/dist/src/session/custom-tool-adapter.js.map +0 -1
  327. package/dist/src/session/history-converter.d.ts +0 -21
  328. package/dist/src/session/history-converter.js +0 -59
  329. package/dist/src/session/history-converter.js.map +0 -1
  330. package/dist/src/session/history-converter.test.d.ts +0 -6
  331. package/dist/src/session/history-converter.test.js +0 -130
  332. package/dist/src/session/history-converter.test.js.map +0 -1
  333. package/dist/src/session/session-manager.d.ts +0 -219
  334. package/dist/src/session/session-manager.js +0 -915
  335. package/dist/src/session/session-manager.js.map +0 -1
  336. package/dist/src/session/session-manager.test.d.ts +0 -6
  337. package/dist/src/session/session-manager.test.js +0 -455
  338. package/dist/src/session/session-manager.test.js.map +0 -1
  339. package/dist/src/session/session-runner.d.ts +0 -45
  340. package/dist/src/session/session-runner.js +0 -719
  341. package/dist/src/session/session-runner.js.map +0 -1
  342. package/dist/src/session/session-runner.test.d.ts +0 -6
  343. package/dist/src/session/session-runner.test.js +0 -834
  344. package/dist/src/session/session-runner.test.js.map +0 -1
  345. /package/dist/src/{__tests__/sse-contract.test.d.ts → __fixtures__/smoke.test.d.ts} +0 -0
  346. /package/dist/src/{__tests__/tools.test.d.ts → agent/loop.test.d.ts} +0 -0
  347. /package/dist/src/{agent/write-repo-file.test.d.ts → context/compiler.test.d.ts} +0 -0
@@ -0,0 +1,1030 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ /**
7
+ * Agent Loop Tests
8
+ *
9
+ * Tests the state machine core:
10
+ * 1. Unit: each state handler produces expected transitions
11
+ * 2. Integration: runAgent() full conversation flow
12
+ * 3. Abort handling: clean shutdown on signal abort
13
+ * 4. Turn budget: max_turns enforcement
14
+ * 5. SSE event ordering (init first, done last, done always has usage)
15
+ */
16
+ import { describe, it, expect, vi } from 'vitest';
17
+ import { z } from 'zod';
18
+ import { SSEEventType } from '../types.js';
19
+ import { runAgent, transition } from './loop.js';
20
+ import { DEFAULT_LOOP_CONFIG } from './loop-types.js';
21
+ // ---------------------------------------------------------------------------
22
+ // Mock helpers
23
+ // ---------------------------------------------------------------------------
24
+ function makeMockLogger() {
25
+ return {
26
+ trace: vi.fn(),
27
+ debug: vi.fn(),
28
+ info: vi.fn(),
29
+ warn: vi.fn(),
30
+ error: vi.fn(),
31
+ fatal: vi.fn(),
32
+ child: vi.fn().mockReturnThis(),
33
+ };
34
+ }
35
+ function makeUsage(overrides) {
36
+ return {
37
+ inputTokens: 0,
38
+ outputTokens: 0,
39
+ totalTokens: 0,
40
+ ...overrides,
41
+ };
42
+ }
43
+ function makeMockToolDef(overrides) {
44
+ return {
45
+ description: 'Test tool',
46
+ parameters: {},
47
+ execute: vi.fn().mockResolvedValue({ output: 'tool result' }),
48
+ readOnly: false,
49
+ metadata: { category: 'custom' },
50
+ ...overrides,
51
+ };
52
+ }
53
+ function makeMockRegistry(tools = {}) {
54
+ return {
55
+ register: vi.fn(),
56
+ get: vi.fn((name) => tools[name]),
57
+ getTools: vi.fn(() => tools),
58
+ names: vi.fn(() => Object.keys(tools)),
59
+ subset: vi.fn().mockReturnValue({}),
60
+ size: Object.keys(tools).length,
61
+ };
62
+ }
63
+ /**
64
+ * Create a mock StreamTextResult that yields the given stream events.
65
+ */
66
+ function makeMockStream(events, text = 'Hello from the model') {
67
+ return {
68
+ textStream: (async function* () {
69
+ yield text;
70
+ })(),
71
+ fullStream: (async function* () {
72
+ for (const event of events) {
73
+ yield event;
74
+ }
75
+ })(),
76
+ usage: Promise.resolve(makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 })),
77
+ text: Promise.resolve(text),
78
+ };
79
+ }
80
+ function makeMockContext(overrides) {
81
+ const logger = makeMockLogger();
82
+ return {
83
+ provider: {
84
+ model: 'test-model',
85
+ provider: 'test',
86
+ languageModel: {},
87
+ streamText: vi.fn(() => makeMockStream([
88
+ { type: 'text-delta', textDelta: 'Hello' },
89
+ { type: 'finish', usage: makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }) },
90
+ ])),
91
+ generateText: vi.fn(),
92
+ },
93
+ toolRegistry: makeMockRegistry(),
94
+ permissionChecker: {
95
+ check: vi.fn().mockReturnValue({ allowed: true }),
96
+ },
97
+ logger,
98
+ signal: new AbortController().signal,
99
+ sessionId: 'test-session',
100
+ tenantId: 'test-tenant',
101
+ user: { roles: ['user'] },
102
+ systemPrompt: 'You are a helpful assistant.',
103
+ messages: [],
104
+ usage: makeUsage(),
105
+ turnCount: 0,
106
+ maxTurns: 10,
107
+ maxContextTokens: 200_000,
108
+ config: { ...DEFAULT_LOOP_CONFIG },
109
+ compactionFailures: 0,
110
+ preExecutionCache: new Map(),
111
+ waitForConfirmation: vi.fn().mockResolvedValue(true),
112
+ buildToolContext: vi.fn().mockReturnValue({
113
+ request: vi.fn(),
114
+ store: vi.fn(),
115
+ env: vi.fn(),
116
+ log: vi.fn(),
117
+ user: { roles: [] },
118
+ signal: new AbortController().signal,
119
+ sessionId: 'test-session',
120
+ tenantId: 'test-tenant',
121
+ }),
122
+ ...overrides,
123
+ };
124
+ }
125
+ // ---------------------------------------------------------------------------
126
+ // 1. Unit: transition dispatcher + exhaustive switch
127
+ // ---------------------------------------------------------------------------
128
+ describe('transition', () => {
129
+ it('dispatches thinking state to handleThinking', async () => {
130
+ const ctx = makeMockContext();
131
+ const state = { type: 'thinking', messages: [] };
132
+ const result = await transition(state, ctx);
133
+ // Should transition to streaming
134
+ expect(result.next.type).toBe('streaming');
135
+ expect(ctx.turnCount).toBe(1);
136
+ });
137
+ it('dispatches done state as pass-through', async () => {
138
+ const ctx = makeMockContext();
139
+ const state = { type: 'done', usage: makeUsage(), reason: 'model_stop' };
140
+ const result = await transition(state, ctx);
141
+ expect(result.next).toBe(state);
142
+ expect(result.effects).toEqual([]);
143
+ });
144
+ });
145
+ // ---------------------------------------------------------------------------
146
+ // 2. State handler unit tests
147
+ // ---------------------------------------------------------------------------
148
+ describe('handleThinking (via transition)', () => {
149
+ it('increments turn count and starts streaming', async () => {
150
+ const ctx = makeMockContext();
151
+ const state = { type: 'thinking', messages: [] };
152
+ const result = await transition(state, ctx);
153
+ expect(result.next.type).toBe('streaming');
154
+ expect(ctx.turnCount).toBe(1);
155
+ expect(ctx.provider.streamText).toHaveBeenCalledWith(expect.objectContaining({
156
+ system: 'You are a helpful assistant.',
157
+ abortSignal: ctx.signal,
158
+ }));
159
+ });
160
+ it('passes tool schemas without execute functions to provider', async () => {
161
+ const testTool = makeMockToolDef({ description: 'Search repos' });
162
+ const registry = makeMockRegistry({ search: testTool });
163
+ const ctx = makeMockContext({ toolRegistry: registry });
164
+ await transition({ type: 'thinking', messages: [] }, ctx);
165
+ const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
166
+ const tools = streamTextCall.tools;
167
+ expect(tools['search']).toBeDefined();
168
+ // Should have inputSchema (not parameters) and no execute
169
+ expect(tools['search']).toHaveProperty('inputSchema');
170
+ expect(tools['search']).not.toHaveProperty('execute');
171
+ });
172
+ it('detects tool call loops and forces done(loop_detected)', async () => {
173
+ // Build messages with 8 repeated tool calls for the same tool
174
+ const messages = [];
175
+ for (let i = 0; i < 8; i++) {
176
+ messages.push({
177
+ role: 'assistant',
178
+ content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'stuck_tool', input: { q: 'same' } }],
179
+ });
180
+ messages.push({
181
+ role: 'tool',
182
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'stuck_tool', output: { type: 'text', value: 'error' } }],
183
+ });
184
+ }
185
+ const ctx = makeMockContext();
186
+ const result = await transition({ type: 'thinking', messages }, ctx);
187
+ expect(result.next.type).toBe('done');
188
+ if (result.next.type === 'done') {
189
+ expect(result.next.reason).toBe('loop_detected');
190
+ }
191
+ const errorEvents = result.effects.filter((e) => e.type === SSEEventType.Error);
192
+ expect(errorEvents.length).toBe(1);
193
+ });
194
+ it('detects loops with similar (not identical) parameters', async () => {
195
+ // Build messages where the same tool is called with slightly different params
196
+ const messages = [];
197
+ for (let i = 0; i < 8; i++) {
198
+ messages.push({
199
+ role: 'assistant',
200
+ content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'search_api', input: { query: 'test', page: i } }],
201
+ });
202
+ messages.push({
203
+ role: 'tool',
204
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'search_api', output: { type: 'text', value: 'no results' } }],
205
+ });
206
+ }
207
+ const ctx = makeMockContext();
208
+ const result = await transition({ type: 'thinking', messages }, ctx);
209
+ // Same tool, same keys, >50% identical values → detected as loop
210
+ expect(result.next.type).toBe('done');
211
+ if (result.next.type === 'done') {
212
+ expect(result.next.reason).toBe('loop_detected');
213
+ }
214
+ });
215
+ it('injects warning when tool called 3+ times', async () => {
216
+ const messages = [];
217
+ for (let i = 0; i < 3; i++) {
218
+ messages.push({
219
+ role: 'assistant',
220
+ content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'flaky_api', input: {} }],
221
+ });
222
+ messages.push({
223
+ role: 'tool',
224
+ content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'flaky_api', output: { type: 'text', value: 'fail' } }],
225
+ });
226
+ }
227
+ const ctx = makeMockContext();
228
+ const result = await transition({ type: 'thinking', messages }, ctx);
229
+ // Should still stream (not stop), but the messages passed to streamText
230
+ // should include a warning
231
+ expect(result.next.type).toBe('streaming');
232
+ const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
233
+ const lastMsg = streamTextCall.messages[streamTextCall.messages.length - 1];
234
+ expect(lastMsg.role).toBe('system');
235
+ if (typeof lastMsg.content === 'string') {
236
+ expect(lastMsg.content).toContain('flaky_api');
237
+ expect(lastMsg.content).toContain('3 times');
238
+ }
239
+ });
240
+ });
241
+ describe('handleStreaming (via transition)', () => {
242
+ it('text-only response transitions to done(model_stop)', async () => {
243
+ const stream = makeMockStream([
244
+ { type: 'text-delta', textDelta: 'Hello!' },
245
+ { type: 'finish', usage: makeUsage({ inputTokens: 50, outputTokens: 20, totalTokens: 70 }) },
246
+ ]);
247
+ const ctx = makeMockContext();
248
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
249
+ const result = await transition(state, ctx);
250
+ expect(result.next.type).toBe('done');
251
+ if (result.next.type === 'done') {
252
+ expect(result.next.reason).toBe('model_stop');
253
+ }
254
+ // Should have emitted text_delta events
255
+ const textEvents = result.effects.filter((e) => e.type === SSEEventType.TextDelta);
256
+ expect(textEvents.length).toBeGreaterThan(0);
257
+ });
258
+ it('tool call response transitions to executing', async () => {
259
+ const stream = makeMockStream([
260
+ { type: 'text-delta', textDelta: 'Let me search.' },
261
+ {
262
+ type: 'tool-call',
263
+ toolCallId: 'call-1',
264
+ toolName: 'search',
265
+ args: { query: 'test' },
266
+ },
267
+ { type: 'finish', usage: makeUsage({ inputTokens: 50, outputTokens: 20, totalTokens: 70 }) },
268
+ ], 'Let me search.');
269
+ const ctx = makeMockContext();
270
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
271
+ const result = await transition(state, ctx);
272
+ expect(result.next.type).toBe('executing');
273
+ if (result.next.type === 'executing') {
274
+ expect(result.next.current.toolCallId).toBe('call-1');
275
+ expect(result.next.current.toolName).toBe('search');
276
+ expect(result.next.current.args).toEqual({ query: 'test' });
277
+ }
278
+ });
279
+ it('tracks token usage from finish events', async () => {
280
+ const stream = makeMockStream([
281
+ { type: 'finish', usage: makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }) },
282
+ ]);
283
+ const ctx = makeMockContext();
284
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
285
+ await transition(state, ctx);
286
+ expect(ctx.usage.inputTokens).toBe(100);
287
+ expect(ctx.usage.outputTokens).toBe(50);
288
+ });
289
+ it('stream error transitions to done(error)', async () => {
290
+ const stream = makeMockStream([
291
+ { type: 'error', error: new Error('Provider failed') },
292
+ ]);
293
+ const ctx = makeMockContext();
294
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
295
+ const result = await transition(state, ctx);
296
+ expect(result.next.type).toBe('done');
297
+ if (result.next.type === 'done') {
298
+ expect(result.next.reason).toBe('error');
299
+ }
300
+ // Should have emitted an error SSE event
301
+ const errorEvents = result.effects.filter((e) => e.type === SSEEventType.Error);
302
+ expect(errorEvents.length).toBe(1);
303
+ });
304
+ it('pre-executes read-only tools during streaming', async () => {
305
+ const readOnlyTool = makeMockToolDef({
306
+ readOnly: true,
307
+ execute: vi.fn().mockResolvedValue('cached result'),
308
+ });
309
+ const registry = makeMockRegistry({ lookup: readOnlyTool });
310
+ const stream = makeMockStream([
311
+ { type: 'tool-call', toolCallId: 'call-ro', toolName: 'lookup', args: { id: '1' } },
312
+ { type: 'finish', usage: makeUsage() },
313
+ ], '');
314
+ const ctx = makeMockContext({ toolRegistry: registry });
315
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
316
+ await transition(state, ctx);
317
+ // Pre-execution cache should have an entry
318
+ expect(ctx.preExecutionCache.has('call-ro')).toBe(true);
319
+ });
320
+ it('logs pre-execution errors on abort instead of swallowing silently', async () => {
321
+ const failingTool = makeMockToolDef({
322
+ readOnly: true,
323
+ execute: vi.fn().mockRejectedValue(new Error('tool crashed')),
324
+ });
325
+ const registry = makeMockRegistry({ broken_lookup: failingTool });
326
+ const stream = makeMockStream([
327
+ { type: 'tool-call', toolCallId: 'call-fail', toolName: 'broken_lookup', args: {} },
328
+ { type: 'finish', usage: makeUsage() },
329
+ ], '');
330
+ const ctx = makeMockContext({ toolRegistry: registry });
331
+ const state = { type: 'streaming', stream, pendingToolCalls: [] };
332
+ await transition(state, ctx);
333
+ // Wait for the pre-execution promise to settle (it rejects)
334
+ const cached = ctx.preExecutionCache.get('call-fail');
335
+ expect(cached).toBeDefined();
336
+ // The .catch() handler should have logged, not thrown
337
+ await expect(cached).rejects.toThrow('tool crashed');
338
+ // The suppression handler should have logged the error
339
+ expect(ctx.logger.debug).toHaveBeenCalledWith('preexec_suppressed', expect.objectContaining({
340
+ tool: 'broken_lookup',
341
+ error: 'tool crashed',
342
+ }));
343
+ });
344
+ });
345
+ describe('handleExecuting (via transition)', () => {
346
+ it('executes a tool and transitions to thinking when queue empty', async () => {
347
+ const searchTool = makeMockToolDef({
348
+ execute: vi.fn().mockResolvedValue({ repos: ['amodal'] }),
349
+ });
350
+ const registry = makeMockRegistry({ search: searchTool });
351
+ const ctx = makeMockContext({ toolRegistry: registry });
352
+ const state = {
353
+ type: 'executing',
354
+ queue: [],
355
+ current: { toolCallId: 'call-1', toolName: 'search', args: { q: 'test' } },
356
+ results: [],
357
+ };
358
+ const result = await transition(state, ctx);
359
+ expect(result.next.type).toBe('thinking');
360
+ expect(searchTool.execute).toHaveBeenCalledWith({ q: 'test' }, expect.objectContaining({ sessionId: 'test-session' }));
361
+ // Should emit tool_call_start and tool_call_result SSE events
362
+ const startEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallStart);
363
+ const resultEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult);
364
+ expect(startEvents.length).toBe(1);
365
+ expect(resultEvents.length).toBe(1);
366
+ });
367
+ it('transitions to compacting when context exceeds threshold', async () => {
368
+ const tool = makeMockToolDef({
369
+ // Return a large result to inflate context
370
+ execute: vi.fn().mockResolvedValue('x'.repeat(10_000)),
371
+ });
372
+ const registry = makeMockRegistry({ big_tool: tool });
373
+ const ctx = makeMockContext({
374
+ toolRegistry: registry,
375
+ maxContextTokens: 1000, // Very small budget
376
+ config: { ...DEFAULT_LOOP_CONFIG, compactThreshold: 0.7 },
377
+ });
378
+ // Pre-populate messages to be near the threshold
379
+ ctx.messages = Array.from({ length: 20 }, () => ({
380
+ role: 'user',
381
+ content: 'x'.repeat(200),
382
+ }));
383
+ const state = {
384
+ type: 'executing',
385
+ queue: [],
386
+ current: { toolCallId: 'call-1', toolName: 'big_tool', args: {} },
387
+ results: [],
388
+ };
389
+ const result = await transition(state, ctx);
390
+ expect(result.next.type).toBe('compacting');
391
+ if (result.next.type === 'compacting') {
392
+ expect(result.next.estimatedTokens).toBeGreaterThan(0);
393
+ }
394
+ expect(ctx.logger.info).toHaveBeenCalledWith('context_compaction_triggered', expect.objectContaining({
395
+ session: 'test-session',
396
+ }));
397
+ });
398
+ it('continues to next tool when queue has more items', async () => {
399
+ const tool = makeMockToolDef();
400
+ const registry = makeMockRegistry({ tool_a: tool, tool_b: tool });
401
+ const ctx = makeMockContext({ toolRegistry: registry });
402
+ const state = {
403
+ type: 'executing',
404
+ queue: [{ toolCallId: 'call-2', toolName: 'tool_b', args: {} }],
405
+ current: { toolCallId: 'call-1', toolName: 'tool_a', args: {} },
406
+ results: [],
407
+ };
408
+ const result = await transition(state, ctx);
409
+ expect(result.next.type).toBe('executing');
410
+ if (result.next.type === 'executing') {
411
+ expect(result.next.current.toolCallId).toBe('call-2');
412
+ expect(result.next.queue).toEqual([]);
413
+ }
414
+ });
415
+ it('returns error result for unknown tool', async () => {
416
+ const ctx = makeMockContext();
417
+ const state = {
418
+ type: 'executing',
419
+ queue: [],
420
+ current: { toolCallId: 'call-1', toolName: 'nonexistent', args: {} },
421
+ results: [],
422
+ };
423
+ const result = await transition(state, ctx);
424
+ // Should transition to thinking (agent can recover)
425
+ expect(result.next.type).toBe('thinking');
426
+ // Messages should contain the error tool result
427
+ expect(ctx.messages.length).toBeGreaterThan(0);
428
+ });
429
+ it('handles tool execution error as continue site', async () => {
430
+ const failingTool = makeMockToolDef({
431
+ execute: vi.fn().mockRejectedValue(new Error('API rate limit')),
432
+ });
433
+ const registry = makeMockRegistry({ api_call: failingTool });
434
+ const ctx = makeMockContext({ toolRegistry: registry });
435
+ const state = {
436
+ type: 'executing',
437
+ queue: [],
438
+ current: { toolCallId: 'call-1', toolName: 'api_call', args: {} },
439
+ results: [],
440
+ };
441
+ const result = await transition(state, ctx);
442
+ // Should NOT crash — transitions to thinking so model can recover
443
+ expect(result.next.type).toBe('thinking');
444
+ // Should emit tool_call_result with error status
445
+ const resultEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult);
446
+ expect(resultEvents.length).toBe(1);
447
+ // Logger should have recorded the error
448
+ expect(ctx.logger.error).toHaveBeenCalledWith('tool_execution_error', expect.objectContaining({
449
+ tool: 'api_call',
450
+ }));
451
+ });
452
+ it('rejects hallucinated args that fail schema validation', async () => {
453
+ const { z } = await import('zod');
454
+ const strictTool = makeMockToolDef({
455
+ parameters: z.object({ query: z.string(), limit: z.number().int().positive() }),
456
+ });
457
+ const registry = makeMockRegistry({ search: strictTool });
458
+ const ctx = makeMockContext({ toolRegistry: registry });
459
+ const state = {
460
+ type: 'executing',
461
+ queue: [],
462
+ current: { toolCallId: 'call-1', toolName: 'search', args: { query: 123, limit: -5 } },
463
+ results: [],
464
+ };
465
+ const result = await transition(state, ctx);
466
+ // Should recover — transition to thinking with error message for the model
467
+ expect(result.next.type).toBe('thinking');
468
+ expect(ctx.logger.warn).toHaveBeenCalledWith('tool_args_invalid', expect.objectContaining({
469
+ tool: 'search',
470
+ }));
471
+ // Should NOT have called execute
472
+ expect(strictTool.execute).not.toHaveBeenCalled();
473
+ });
474
+ it('sanitizes sensitive parameters in SSE events', async () => {
475
+ const tool = makeMockToolDef();
476
+ const registry = makeMockRegistry({ auth_tool: tool });
477
+ const ctx = makeMockContext({ toolRegistry: registry });
478
+ const state = {
479
+ type: 'executing',
480
+ queue: [],
481
+ current: {
482
+ toolCallId: 'call-1',
483
+ toolName: 'auth_tool',
484
+ args: { api_key: 'sk-secret123', query: 'hello' },
485
+ },
486
+ results: [],
487
+ };
488
+ const result = await transition(state, ctx);
489
+ const startEvent = result.effects.find((e) => e.type === SSEEventType.ToolCallStart);
490
+ expect(startEvent).toBeDefined();
491
+ if (startEvent && 'parameters' in startEvent) {
492
+ const params = startEvent.parameters;
493
+ expect(params['api_key']).toBe('[REDACTED]');
494
+ expect(params['query']).toBe('hello');
495
+ }
496
+ });
497
+ it('snips oversized tool results keeping head and tail', async () => {
498
+ const largeOutput = 'A'.repeat(25_000); // Exceeds 20K default maxResultSize
499
+ const tool = makeMockToolDef({
500
+ execute: vi.fn().mockResolvedValue(largeOutput),
501
+ });
502
+ const registry = makeMockRegistry({ big_api: tool });
503
+ const ctx = makeMockContext({ toolRegistry: registry });
504
+ const state = {
505
+ type: 'executing',
506
+ queue: [],
507
+ current: { toolCallId: 'call-1', toolName: 'big_api', args: {} },
508
+ results: [],
509
+ };
510
+ await transition(state, ctx);
511
+ // The message appended should contain the snipped content
512
+ const lastMsg = ctx.messages[ctx.messages.length - 1];
513
+ expect(lastMsg.role).toBe('tool');
514
+ if (Array.isArray(lastMsg.content) && 'output' in lastMsg.content[0]) {
515
+ const output = lastMsg.content[0].output;
516
+ const value = typeof output === 'object' && output !== null && 'value' in output
517
+ ? String(output.value)
518
+ : '';
519
+ expect(value).toContain('snipped');
520
+ expect(value.length).toBeLessThan(largeOutput.length);
521
+ }
522
+ expect(ctx.logger.info).toHaveBeenCalledWith('tool_result_snipped', expect.objectContaining({
523
+ tool: 'big_api',
524
+ originalSize: 25_000,
525
+ }));
526
+ });
527
+ it('uses pre-execution cache for read-only tools', async () => {
528
+ const readTool = makeMockToolDef({ readOnly: true });
529
+ const registry = makeMockRegistry({ read_data: readTool });
530
+ const ctx = makeMockContext({ toolRegistry: registry });
531
+ // Simulate pre-execution cache from streaming phase
532
+ ctx.preExecutionCache.set('call-cached', Promise.resolve({ data: 'cached' }));
533
+ const state = {
534
+ type: 'executing',
535
+ queue: [],
536
+ current: { toolCallId: 'call-cached', toolName: 'read_data', args: {} },
537
+ results: [],
538
+ };
539
+ await transition(state, ctx);
540
+ // The cached result should be used — tool.execute should NOT be called again
541
+ expect(readTool.execute).not.toHaveBeenCalled();
542
+ });
543
+ });
544
+ describe('handleConfirming (via transition)', () => {
545
+ it('approved confirmation resumes executing', async () => {
546
+ const ctx = makeMockContext({
547
+ waitForConfirmation: vi.fn().mockResolvedValue(true),
548
+ });
549
+ const state = {
550
+ type: 'confirming',
551
+ call: { toolCallId: 'call-1', toolName: 'delete_item', args: { id: '123' } },
552
+ remainingQueue: [],
553
+ };
554
+ const result = await transition(state, ctx);
555
+ expect(result.next.type).toBe('executing');
556
+ if (result.next.type === 'executing') {
557
+ expect(result.next.current.toolCallId).toBe('call-1');
558
+ }
559
+ });
560
+ it('denied confirmation transitions to thinking with denial message', async () => {
561
+ const ctx = makeMockContext({
562
+ waitForConfirmation: vi.fn().mockResolvedValue(false),
563
+ });
564
+ const state = {
565
+ type: 'confirming',
566
+ call: { toolCallId: 'call-1', toolName: 'delete_item', args: { id: '123' } },
567
+ remainingQueue: [],
568
+ };
569
+ const result = await transition(state, ctx);
570
+ expect(result.next.type).toBe('thinking');
571
+ // A denial message should have been appended
572
+ expect(ctx.messages.length).toBeGreaterThan(0);
573
+ });
574
+ it('intercepts dispatch_task and transitions to DISPATCHING', async () => {
575
+ const dispatchTool = {
576
+ description: 'Dispatch sub-task',
577
+ parameters: z.object({ agent_name: z.string(), tools: z.array(z.string()), prompt: z.string() }),
578
+ execute: vi.fn(),
579
+ readOnly: false,
580
+ metadata: { category: 'system' },
581
+ };
582
+ const registry = makeMockRegistry({ dispatch_task: dispatchTool });
583
+ const ctx = makeMockContext({ toolRegistry: registry });
584
+ const state = {
585
+ type: 'executing',
586
+ queue: [],
587
+ current: {
588
+ toolCallId: 'tc-dispatch',
589
+ toolName: 'dispatch_task',
590
+ args: { agent_name: 'fetcher', tools: ['request'], prompt: 'Fetch data' },
591
+ },
592
+ results: [],
593
+ };
594
+ const result = await transition(state, ctx);
595
+ // Should transition to DISPATCHING, not execute the tool
596
+ expect(result.next.type).toBe('dispatching');
597
+ if (result.next.type === 'dispatching') {
598
+ expect(result.next.task.agentName).toBe('fetcher');
599
+ expect(result.next.task.toolSubset).toEqual(['request']);
600
+ expect(result.next.task.prompt).toBe('Fetch data');
601
+ expect(result.next.toolCallId).toBe('tc-dispatch');
602
+ }
603
+ // Should emit ToolCallStart but NOT call execute
604
+ const startEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallStart);
605
+ expect(startEvents.length).toBe(1);
606
+ expect(dispatchTool.execute).not.toHaveBeenCalled();
607
+ });
608
+ it('strips dispatch_task from child tool subset', async () => {
609
+ const dispatchTool = {
610
+ description: 'Dispatch',
611
+ parameters: z.object({ agent_name: z.string(), tools: z.array(z.string()), prompt: z.string() }),
612
+ execute: vi.fn(),
613
+ readOnly: false,
614
+ metadata: { category: 'system' },
615
+ };
616
+ const registry = makeMockRegistry({ dispatch_task: dispatchTool });
617
+ const ctx = makeMockContext({ toolRegistry: registry });
618
+ const state = {
619
+ type: 'executing',
620
+ queue: [],
621
+ current: {
622
+ toolCallId: 'tc-d',
623
+ toolName: 'dispatch_task',
624
+ args: { agent_name: 'child', tools: ['request', 'dispatch_task', 'query_store'], prompt: 'Go' },
625
+ },
626
+ results: [],
627
+ };
628
+ const result = await transition(state, ctx);
629
+ if (result.next.type === 'dispatching') {
630
+ expect(result.next.task.toolSubset).toEqual(['request', 'query_store']);
631
+ expect(result.next.task.toolSubset).not.toContain('dispatch_task');
632
+ }
633
+ });
634
+ });
635
+ describe('handleCompacting (via transition)', () => {
636
+ it('summarizes old messages and keeps recent turns', async () => {
637
+ // Build a conversation with enough turns to compact
638
+ const messages = [];
639
+ for (let i = 0; i < 10; i++) {
640
+ messages.push({ role: 'user', content: `Question ${i}` });
641
+ messages.push({ role: 'assistant', content: `Answer ${i}` });
642
+ }
643
+ const ctx = makeMockContext({
644
+ provider: {
645
+ model: 'test-model',
646
+ provider: 'test',
647
+ languageModel: {},
648
+ streamText: vi.fn(),
649
+ generateText: vi.fn().mockResolvedValue({
650
+ text: '## Summary\nThis is a compacted summary.',
651
+ toolCalls: [],
652
+ usage: makeUsage({ inputTokens: 200, outputTokens: 100, totalTokens: 300 }),
653
+ finishReason: 'stop',
654
+ }),
655
+ },
656
+ });
657
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
658
+ expect(result.next.type).toBe('thinking');
659
+ if (result.next.type === 'thinking') {
660
+ // Should have fewer messages (summary + recent turns)
661
+ expect(result.next.messages.length).toBeLessThan(messages.length);
662
+ // First message should be the system summary
663
+ const firstMsg = result.next.messages[0];
664
+ expect(firstMsg.role).toBe('system');
665
+ const firstContent = firstMsg.content;
666
+ expect(typeof firstContent === 'string' && firstContent.includes('Conversation Summary')).toBe(true);
667
+ }
668
+ // Should emit compaction_start and compaction_end SSE events
669
+ const startEvents = result.effects.filter((e) => e.type === SSEEventType.CompactionStart);
670
+ const endEvents = result.effects.filter((e) => e.type === SSEEventType.CompactionEnd);
671
+ expect(startEvents.length).toBe(1);
672
+ expect(endEvents.length).toBe(1);
673
+ // Token usage should be tracked
674
+ expect(ctx.usage.inputTokens).toBe(200);
675
+ expect(ctx.usage.outputTokens).toBe(100);
676
+ });
677
+ it('skips compaction when too few messages to split', async () => {
678
+ const messages = [
679
+ { role: 'user', content: 'Hello' },
680
+ { role: 'assistant', content: 'Hi!' },
681
+ ];
682
+ const ctx = makeMockContext();
683
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 1000 }, ctx);
684
+ expect(result.next.type).toBe('thinking');
685
+ if (result.next.type === 'thinking') {
686
+ // Messages unchanged — not enough to compact
687
+ expect(result.next.messages).toBe(messages);
688
+ }
689
+ });
690
+ it('circuit breaker skips compaction after repeated failures', async () => {
691
+ const messages = [];
692
+ for (let i = 0; i < 10; i++) {
693
+ messages.push({ role: 'user', content: `Q${i}` });
694
+ messages.push({ role: 'assistant', content: `A${i}` });
695
+ }
696
+ const ctx = makeMockContext({ compactionFailures: 3 }); // Already at threshold
697
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
698
+ expect(result.next.type).toBe('thinking');
699
+ if (result.next.type === 'thinking') {
700
+ expect(result.next.messages).toBe(messages); // Unchanged
701
+ }
702
+ expect(ctx.logger.warn).toHaveBeenCalledWith('compaction_circuit_breaker', expect.objectContaining({
703
+ failures: 3,
704
+ }));
705
+ });
706
+ it('increments failure counter on generateText error and continues', async () => {
707
+ const messages = [];
708
+ for (let i = 0; i < 10; i++) {
709
+ messages.push({ role: 'user', content: `Q${i}` });
710
+ messages.push({ role: 'assistant', content: `A${i}` });
711
+ }
712
+ const ctx = makeMockContext({
713
+ provider: {
714
+ model: 'test-model',
715
+ provider: 'test',
716
+ languageModel: {},
717
+ streamText: vi.fn(),
718
+ generateText: vi.fn().mockRejectedValue(new Error('Provider rate limited')),
719
+ },
720
+ });
721
+ const result = await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
722
+ // Should continue without compaction
723
+ expect(result.next.type).toBe('thinking');
724
+ if (result.next.type === 'thinking') {
725
+ expect(result.next.messages).toBe(messages); // Unchanged
726
+ }
727
+ expect(ctx.compactionFailures).toBe(1);
728
+ expect(ctx.logger.error).toHaveBeenCalledWith('compaction_failed', expect.objectContaining({
729
+ error: 'Summarization failed: Provider rate limited',
730
+ }));
731
+ });
732
+ it('resets failure counter on successful compaction', async () => {
733
+ const messages = [];
734
+ for (let i = 0; i < 10; i++) {
735
+ messages.push({ role: 'user', content: `Q${i}` });
736
+ messages.push({ role: 'assistant', content: `A${i}` });
737
+ }
738
+ const ctx = makeMockContext({
739
+ compactionFailures: 2,
740
+ provider: {
741
+ model: 'test-model',
742
+ provider: 'test',
743
+ languageModel: {},
744
+ streamText: vi.fn(),
745
+ generateText: vi.fn().mockResolvedValue({
746
+ text: 'Summary of conversation.',
747
+ toolCalls: [],
748
+ usage: makeUsage({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }),
749
+ finishReason: 'stop',
750
+ }),
751
+ },
752
+ });
753
+ await transition({ type: 'compacting', messages, estimatedTokens: 5000 }, ctx);
754
+ expect(ctx.compactionFailures).toBe(0);
755
+ });
756
+ });
757
+ describe('handleDispatching (via transition)', () => {
758
+ it('runs child agent and returns text result to parent', async () => {
759
+ const ctx = makeMockContext();
760
+ const result = await transition({
761
+ type: 'dispatching',
762
+ task: { agentName: 'research-agent', toolSubset: [], prompt: 'Find info' },
763
+ toolCallId: 'tc-dispatch-1',
764
+ queue: [],
765
+ results: [],
766
+ }, ctx);
767
+ // Child completes → parent goes to THINKING (no more queue items)
768
+ expect(result.next.type).toBe('thinking');
769
+ // Should have SubagentEvent effects (thought + complete)
770
+ const subagentEvents = result.effects.filter((e) => e.type === SSEEventType.SubagentEvent);
771
+ expect(subagentEvents.length).toBeGreaterThanOrEqual(1);
772
+ // Should have a ToolCallResult for the dispatch_task call
773
+ const toolResult = result.effects.find((e) => e.type === SSEEventType.ToolCallResult);
774
+ expect(toolResult).toBeDefined();
775
+ // Child usage merged into parent
776
+ expect(ctx.usage.inputTokens).toBeGreaterThan(0);
777
+ });
778
+ it('emits SubagentEvent with correct parent_tool_id', async () => {
779
+ const ctx = makeMockContext();
780
+ const result = await transition({
781
+ type: 'dispatching',
782
+ task: { agentName: 'profiler', toolSubset: [], prompt: 'Profile entity' },
783
+ toolCallId: 'tc-abc',
784
+ queue: [],
785
+ results: [],
786
+ }, ctx);
787
+ const subagentEvents = result.effects.filter((e) => e.type === SSEEventType.SubagentEvent);
788
+ for (const event of subagentEvents) {
789
+ if (event.type === SSEEventType.SubagentEvent) {
790
+ expect(event.parent_tool_id).toBe('tc-abc');
791
+ expect(event.agent_name).toBe('profiler');
792
+ }
793
+ }
794
+ });
795
+ it('continues execution queue after dispatch completes', async () => {
796
+ const ctx = makeMockContext();
797
+ const result = await transition({
798
+ type: 'dispatching',
799
+ task: { agentName: 'fetcher', toolSubset: [], prompt: 'Fetch data' },
800
+ toolCallId: 'tc-dispatch',
801
+ queue: [{ toolCallId: 'tc-next', toolName: 'request', args: {} }],
802
+ results: [],
803
+ }, ctx);
804
+ // Should transition to executing the next tool in queue
805
+ expect(result.next.type).toBe('executing');
806
+ });
807
+ it('handles child agent error gracefully', async () => {
808
+ // Provider that throws on streamText
809
+ const failingProvider = {
810
+ model: 'test-model',
811
+ provider: 'test',
812
+ languageModel: {},
813
+ streamText: vi.fn(() => { throw new Error('Provider crashed'); }),
814
+ generateText: vi.fn(),
815
+ };
816
+ const ctx = makeMockContext({ provider: failingProvider });
817
+ const result = await transition({
818
+ type: 'dispatching',
819
+ task: { agentName: 'broken-agent', toolSubset: [], prompt: 'Do something' },
820
+ toolCallId: 'tc-fail',
821
+ queue: [],
822
+ results: [],
823
+ }, ctx);
824
+ // Should NOT crash — transitions to thinking so parent can recover
825
+ expect(result.next.type).toBe('thinking');
826
+ // Should emit SubagentEvent with error
827
+ const errorEvents = result.effects.filter((e) => e.type === SSEEventType.SubagentEvent && 'event_type' in e && e.event_type === 'error');
828
+ expect(errorEvents.length).toBeGreaterThanOrEqual(1);
829
+ // Should emit ToolCallResult with error status
830
+ const toolResult = result.effects.find((e) => e.type === SSEEventType.ToolCallResult);
831
+ expect(toolResult).toBeDefined();
832
+ if (toolResult && toolResult.type === SSEEventType.ToolCallResult) {
833
+ expect(toolResult.status).toBe('error');
834
+ }
835
+ // Should log the error
836
+ expect(ctx.logger.error).toHaveBeenCalledWith('dispatch_child_error', expect.objectContaining({
837
+ agent: 'broken-agent',
838
+ }));
839
+ });
840
+ });
841
+ // ---------------------------------------------------------------------------
842
+ // 3. Integration: runAgent() full flow
843
+ // ---------------------------------------------------------------------------
844
+ describe('runAgent', () => {
845
+ it('text-only conversation: init → text_delta → done with usage', async () => {
846
+ const ctx = makeMockContext();
847
+ const events = [];
848
+ for await (const event of runAgent({
849
+ messages: [{ role: 'user', content: 'Hello' }],
850
+ context: ctx,
851
+ })) {
852
+ events.push(event);
853
+ }
854
+ // Init is first
855
+ expect(events[0].type).toBe(SSEEventType.Init);
856
+ // Done is last
857
+ const lastEvent = events[events.length - 1];
858
+ expect(lastEvent.type).toBe(SSEEventType.Done);
859
+ // Done always has usage (G2)
860
+ const doneEvent = lastEvent;
861
+ expect(doneEvent.usage).toBeDefined();
862
+ expect(doneEvent.usage?.input_tokens).toBeGreaterThanOrEqual(0);
863
+ expect(doneEvent.usage?.output_tokens).toBeGreaterThanOrEqual(0);
864
+ });
865
+ it('tool call conversation: init → text → tool_start → tool_result → done', async () => {
866
+ const searchTool = makeMockToolDef({
867
+ execute: vi.fn().mockResolvedValue({ results: ['found'] }),
868
+ });
869
+ const registry = makeMockRegistry({ search: searchTool });
870
+ let callCount = 0;
871
+ const provider = {
872
+ model: 'test-model',
873
+ provider: 'test',
874
+ languageModel: {},
875
+ streamText: vi.fn(() => {
876
+ callCount++;
877
+ if (callCount === 1) {
878
+ // First call: model requests a tool
879
+ return makeMockStream([
880
+ { type: 'text-delta', textDelta: 'Searching...' },
881
+ { type: 'tool-call', toolCallId: 'c1', toolName: 'search', args: { q: 'test' } },
882
+ { type: 'finish', usage: makeUsage({ inputTokens: 50, outputTokens: 20, totalTokens: 70 }) },
883
+ ], 'Searching...');
884
+ }
885
+ // Second call: model responds with text
886
+ return makeMockStream([
887
+ { type: 'text-delta', textDelta: 'Found results.' },
888
+ { type: 'finish', usage: makeUsage({ inputTokens: 80, outputTokens: 30, totalTokens: 110 }) },
889
+ ], 'Found results.');
890
+ }),
891
+ generateText: vi.fn(),
892
+ };
893
+ const ctx = makeMockContext({ provider, toolRegistry: registry });
894
+ const events = [];
895
+ for await (const event of runAgent({
896
+ messages: [{ role: 'user', content: 'Search for test' }],
897
+ context: ctx,
898
+ })) {
899
+ events.push(event);
900
+ }
901
+ // Should have init, text deltas, tool events, more text deltas, done
902
+ const types = events.map((e) => e.type);
903
+ expect(types[0]).toBe(SSEEventType.Init);
904
+ expect(types[types.length - 1]).toBe(SSEEventType.Done);
905
+ expect(types).toContain(SSEEventType.ToolCallStart);
906
+ expect(types).toContain(SSEEventType.ToolCallResult);
907
+ // tool_call_start must come before tool_call_result
908
+ const startIdx = types.indexOf(SSEEventType.ToolCallStart);
909
+ const resultIdx = types.indexOf(SSEEventType.ToolCallResult);
910
+ expect(startIdx).toBeLessThan(resultIdx);
911
+ // Provider should have been called twice (thinking → tool → thinking → done)
912
+ expect(provider.streamText).toHaveBeenCalledTimes(2);
913
+ });
914
+ it('abort signal terminates the loop with user_abort', async () => {
915
+ const abortController = new AbortController();
916
+ // Provider that aborts after first call
917
+ const provider = {
918
+ model: 'test-model',
919
+ provider: 'test',
920
+ languageModel: {},
921
+ streamText: vi.fn(() => {
922
+ // Abort after stream starts
923
+ abortController.abort();
924
+ return makeMockStream([
925
+ { type: 'text-delta', textDelta: 'Starting...' },
926
+ { type: 'finish', usage: makeUsage() },
927
+ ]);
928
+ }),
929
+ generateText: vi.fn(),
930
+ };
931
+ const ctx = makeMockContext({ provider, signal: abortController.signal });
932
+ const events = [];
933
+ for await (const event of runAgent({
934
+ messages: [{ role: 'user', content: 'Hello' }],
935
+ context: ctx,
936
+ })) {
937
+ events.push(event);
938
+ }
939
+ // Should end with done event
940
+ const doneEvent = events[events.length - 1];
941
+ expect(doneEvent.type).toBe(SSEEventType.Done);
942
+ // Usage should be present even on abort
943
+ expect(doneEvent.usage).toBeDefined();
944
+ });
945
+ it('max turns terminates the loop', async () => {
946
+ const tool = makeMockToolDef();
947
+ const registry = makeMockRegistry({ loop_tool: tool });
948
+ // Provider always requests a tool call (infinite loop)
949
+ const provider = {
950
+ model: 'test-model',
951
+ provider: 'test',
952
+ languageModel: {},
953
+ streamText: vi.fn(() => makeMockStream([
954
+ { type: 'tool-call', toolCallId: `c-${Date.now()}`, toolName: 'loop_tool', args: {} },
955
+ { type: 'finish', usage: makeUsage({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }) },
956
+ ], '')),
957
+ generateText: vi.fn(),
958
+ };
959
+ const ctx = makeMockContext({
960
+ provider,
961
+ toolRegistry: registry,
962
+ maxTurns: 3,
963
+ });
964
+ const events = [];
965
+ for await (const event of runAgent({
966
+ messages: [{ role: 'user', content: 'Loop forever' }],
967
+ context: ctx,
968
+ })) {
969
+ events.push(event);
970
+ }
971
+ // Should have terminated
972
+ const doneEvent = events[events.length - 1];
973
+ expect(doneEvent.type).toBe(SSEEventType.Done);
974
+ // Turn count should not exceed maxTurns
975
+ expect(ctx.turnCount).toBeLessThanOrEqual(3);
976
+ });
977
+ it('done event always includes usage regardless of reason (G2)', async () => {
978
+ // Abort immediately
979
+ const abortController = new AbortController();
980
+ abortController.abort();
981
+ const ctx = makeMockContext({
982
+ signal: abortController.signal,
983
+ usage: makeUsage({ inputTokens: 42, outputTokens: 13, totalTokens: 55 }),
984
+ });
985
+ const events = [];
986
+ for await (const event of runAgent({
987
+ messages: [{ role: 'user', content: 'Test' }],
988
+ context: ctx,
989
+ })) {
990
+ events.push(event);
991
+ }
992
+ const doneEvent = events[events.length - 1];
993
+ expect(doneEvent.type).toBe(SSEEventType.Done);
994
+ expect(doneEvent.usage).toBeDefined();
995
+ expect(doneEvent.usage?.input_tokens).toBe(42);
996
+ expect(doneEvent.usage?.output_tokens).toBe(13);
997
+ });
998
+ it('init event has session_id', async () => {
999
+ const ctx = makeMockContext({ sessionId: 'sess-abc' });
1000
+ const events = [];
1001
+ for await (const event of runAgent({
1002
+ messages: [{ role: 'user', content: 'Hi' }],
1003
+ context: ctx,
1004
+ })) {
1005
+ events.push(event);
1006
+ }
1007
+ const initEvent = events[0];
1008
+ expect(initEvent.type).toBe(SSEEventType.Init);
1009
+ expect(initEvent.session_id).toBe('sess-abc');
1010
+ });
1011
+ it('logs agent_loop_start and agent_loop_done', async () => {
1012
+ const ctx = makeMockContext();
1013
+ const events = [];
1014
+ for await (const event of runAgent({
1015
+ messages: [{ role: 'user', content: 'Hello' }],
1016
+ context: ctx,
1017
+ })) {
1018
+ events.push(event);
1019
+ }
1020
+ expect(ctx.logger.info).toHaveBeenCalledWith('agent_loop_start', expect.objectContaining({
1021
+ session: 'test-session',
1022
+ tenant: 'test-tenant',
1023
+ }));
1024
+ expect(ctx.logger.info).toHaveBeenCalledWith('agent_loop_done', expect.objectContaining({
1025
+ session: 'test-session',
1026
+ reason: expect.any(String),
1027
+ }));
1028
+ });
1029
+ });
1030
+ //# sourceMappingURL=loop.test.js.map