@vellumai/assistant 0.8.7 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/Dockerfile +20 -4
  2. package/docker-entrypoint.sh +4 -2
  3. package/docker-init-apt-root.sh +3 -1
  4. package/docker-kata-apt-env.sh +3 -1
  5. package/docker-kata-runtime-family.sh +12 -0
  6. package/docs/architecture/memory.md +1 -1
  7. package/docs/plugins.md +75 -79
  8. package/examples/plugins/echo/README.md +6 -12
  9. package/examples/plugins/echo/register.ts +0 -41
  10. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  11. package/openapi.yaml +3381 -348
  12. package/package.json +1 -1
  13. package/scripts/generate-openapi.ts +68 -41
  14. package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
  15. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  16. package/src/__tests__/agent-loop.test.ts +37 -87
  17. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  18. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  19. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  20. package/src/__tests__/anthropic-provider.test.ts +95 -2
  21. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  22. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  23. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  24. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  25. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  26. package/src/__tests__/btw-routes.test.ts +62 -3
  27. package/src/__tests__/build-persisted-content.test.ts +184 -0
  28. package/src/__tests__/catalog-files.test.ts +1 -1
  29. package/src/__tests__/clawhub-files.test.ts +1 -1
  30. package/src/__tests__/compaction-pipeline.test.ts +1 -1
  31. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  32. package/src/__tests__/config-watcher.test.ts +1 -1
  33. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  34. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
  35. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
  36. package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
  37. package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
  38. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  39. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  40. package/src/__tests__/conversation-pairing.test.ts +4 -31
  41. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  42. package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
  43. package/src/__tests__/conversation-queue.test.ts +2 -0
  44. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  45. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
  48. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  49. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  50. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  51. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  52. package/src/__tests__/conversation-title-service.test.ts +135 -2
  53. package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
  54. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  55. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  56. package/src/__tests__/dm-persistence.test.ts +5 -1
  57. package/src/__tests__/empty-response-hook.test.ts +304 -0
  58. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  59. package/src/__tests__/gemini-image-service.test.ts +13 -0
  60. package/src/__tests__/helpers/mock-provider.ts +110 -0
  61. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  62. package/src/__tests__/history-repair-hook.test.ts +1 -0
  63. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  64. package/src/__tests__/identity-routes.test.ts +248 -7
  65. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  66. package/src/__tests__/injector-background-turn.test.ts +2 -8
  67. package/src/__tests__/injector-chain.test.ts +106 -270
  68. package/src/__tests__/injector-disk-pressure.test.ts +3 -12
  69. package/src/__tests__/injector-document-comments.test.ts +2 -2
  70. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  71. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  72. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  73. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  74. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  75. package/src/__tests__/llm-usage-store.test.ts +223 -1
  76. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  77. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  78. package/src/__tests__/native-web-search.test.ts +191 -0
  79. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  80. package/src/__tests__/openai-image-service.test.ts +17 -0
  81. package/src/__tests__/openai-provider.test.ts +31 -1
  82. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  83. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  84. package/src/__tests__/pipeline-runner.test.ts +29 -39
  85. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  86. package/src/__tests__/plugin-bootstrap.test.ts +13 -28
  87. package/src/__tests__/plugin-registry.test.ts +0 -27
  88. package/src/__tests__/plugin-types.test.ts +2 -125
  89. package/src/__tests__/process-message-display-content.test.ts +6 -2
  90. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  91. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  92. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  93. package/src/__tests__/schedule-routes.test.ts +603 -2
  94. package/src/__tests__/schedule-store.test.ts +41 -0
  95. package/src/__tests__/schedule-tools.test.ts +35 -0
  96. package/src/__tests__/server-history-render.test.ts +314 -1
  97. package/src/__tests__/skillssh-files.test.ts +1 -1
  98. package/src/__tests__/system-prompt.test.ts +20 -0
  99. package/src/__tests__/task-scheduler.test.ts +162 -1
  100. package/src/__tests__/terminal-tools.test.ts +6 -1
  101. package/src/__tests__/title-generate-hook.test.ts +319 -0
  102. package/src/__tests__/tool-error-hook.test.ts +278 -0
  103. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  104. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  105. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  106. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  107. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  108. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  109. package/src/__tests__/usage-routes.test.ts +285 -1
  110. package/src/__tests__/user-plugin-loader.test.ts +2 -2
  111. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  112. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  113. package/src/agent/loop.ts +346 -442
  114. package/src/api/events/assistant-thinking-delta.ts +33 -0
  115. package/src/api/events/tool-output-chunk.ts +45 -0
  116. package/src/api/events/tool-use-preview-start.ts +32 -0
  117. package/src/api/events/trace-event.ts +69 -0
  118. package/src/api/index.ts +48 -13
  119. package/src/api/responses/conversation-message.ts +368 -0
  120. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  121. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  122. package/src/cli/commands/notifications.ts +112 -60
  123. package/src/config/assistant-feature-flags.ts +22 -11
  124. package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
  125. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  126. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  127. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  128. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  129. package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
  130. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  131. package/src/config/feature-flag-cache.ts +3 -3
  132. package/src/config/feature-flag-registry.json +35 -3
  133. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  134. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  135. package/src/config/schemas/llm.ts +1 -0
  136. package/src/config/schemas/memory-v2.ts +8 -0
  137. package/src/config/schemas/memory-v3.ts +8 -0
  138. package/src/config/schemas/platform.ts +8 -0
  139. package/src/config/seed-inference-profiles.ts +2 -2
  140. package/src/config/skills.ts +13 -0
  141. package/src/context/compactor.ts +1 -1
  142. package/src/context/strip-injections.ts +122 -0
  143. package/src/context/token-estimator.ts +23 -0
  144. package/src/context/tool-result-truncation.ts +0 -23
  145. package/src/context/window-manager.ts +3 -6
  146. package/src/credential-execution/executable-discovery.ts +16 -0
  147. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  148. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  149. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  150. package/src/daemon/assistant-attachments.ts +1 -1
  151. package/src/daemon/config-watcher.ts +2 -2
  152. package/src/daemon/context-overflow-reducer.ts +0 -1
  153. package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
  154. package/src/daemon/conversation-agent-loop.ts +281 -760
  155. package/src/daemon/conversation-history.ts +5 -4
  156. package/src/daemon/conversation-lifecycle.ts +3 -4
  157. package/src/daemon/conversation-messaging.ts +7 -6
  158. package/src/daemon/conversation-process.ts +11 -16
  159. package/src/daemon/conversation-runtime-assembly.ts +130 -347
  160. package/src/daemon/conversation-slash.ts +6 -25
  161. package/src/daemon/conversation-surfaces.ts +222 -4
  162. package/src/daemon/conversation-tool-setup.ts +2 -29
  163. package/src/daemon/conversation.ts +32 -14
  164. package/src/daemon/external-plugins-bootstrap.ts +9 -10
  165. package/src/daemon/handlers/config-a2a.ts +51 -36
  166. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  167. package/src/daemon/handlers/config-telegram.ts +16 -2
  168. package/src/daemon/handlers/shared.ts +156 -84
  169. package/src/daemon/handlers/skills.ts +39 -10
  170. package/src/daemon/lifecycle.ts +4 -0
  171. package/src/daemon/message-types/apps.ts +1 -29
  172. package/src/daemon/message-types/messages.ts +9 -57
  173. package/src/daemon/message-types/skills.ts +2 -0
  174. package/src/daemon/message-types/surfaces.ts +136 -3
  175. package/src/daemon/now-scratchpad.ts +21 -0
  176. package/src/daemon/orphan-reaper.test.ts +210 -0
  177. package/src/daemon/orphan-reaper.ts +240 -0
  178. package/src/daemon/persist-unsendable-image.ts +117 -0
  179. package/src/daemon/process-message.ts +1 -3
  180. package/src/daemon/trace-emitter.ts +6 -4
  181. package/src/daemon/trust-context.ts +19 -0
  182. package/src/daemon/wake-target-adapter.ts +3 -1
  183. package/src/home/home-greeting-cache.ts +24 -1
  184. package/src/ipc/gateway-client.test.ts +2 -2
  185. package/src/ipc/gateway-client.ts +3 -3
  186. package/src/media/gemini-image-service.ts +15 -0
  187. package/src/media/openai-image-service.ts +14 -0
  188. package/src/media/types.ts +34 -0
  189. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  190. package/src/memory/auth-fallback-events-store.ts +94 -0
  191. package/src/memory/conversation-title-service.ts +65 -41
  192. package/src/memory/db-init.ts +4 -0
  193. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  194. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  195. package/src/memory/jobs-store.ts +33 -0
  196. package/src/memory/jobs-worker.ts +31 -4
  197. package/src/memory/llm-usage-store.ts +224 -50
  198. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  199. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  200. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  201. package/src/memory/migrations/index.ts +2 -0
  202. package/src/memory/pkb/autoinject.ts +61 -0
  203. package/src/memory/pkb/context.ts +50 -0
  204. package/src/memory/pkb/types.ts +14 -0
  205. package/src/memory/schedule-attribution-sql.ts +104 -0
  206. package/src/memory/schema/infrastructure.ts +16 -0
  207. package/src/memory/usage-grouped-buckets.ts +6 -1
  208. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
  209. package/src/memory/v2/consolidation-job.ts +1 -1
  210. package/src/memory/v3/__tests__/health.test.ts +16 -0
  211. package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
  212. package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
  213. package/src/memory/v3/__tests__/router.test.ts +101 -29
  214. package/src/memory/v3/__tests__/selector.test.ts +93 -27
  215. package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
  216. package/src/memory/v3/health.ts +0 -0
  217. package/src/memory/v3/llm-retry.ts +32 -0
  218. package/src/memory/v3/orchestrate.ts +26 -14
  219. package/src/memory/v3/provider-blocks.ts +15 -5
  220. package/src/memory/v3/router.ts +48 -42
  221. package/src/memory/v3/selector.ts +57 -42
  222. package/src/memory/v3/shadow-plugin.ts +47 -15
  223. package/src/memory/v3/types.ts +8 -0
  224. package/src/notifications/conversation-pairing.ts +8 -15
  225. package/src/notifications/decision-engine.ts +6 -3
  226. package/src/notifications/home-feed-side-effect.ts +12 -1
  227. package/src/permissions/prompter.ts +4 -0
  228. package/src/plugin-api/constants.ts +4 -0
  229. package/src/plugin-api/index.ts +8 -1
  230. package/src/plugin-api/types.ts +151 -1
  231. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  232. package/src/plugins/defaults/empty-response/register.ts +8 -13
  233. package/src/plugins/defaults/index.ts +1 -15
  234. package/src/plugins/defaults/injectors/register.ts +243 -74
  235. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
  236. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  237. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  238. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  239. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  240. package/src/plugins/defaults/title-generate/package.json +1 -1
  241. package/src/plugins/defaults/title-generate/register.ts +18 -18
  242. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  243. package/src/plugins/defaults/tool-error/package.json +1 -1
  244. package/src/plugins/defaults/tool-error/register.ts +9 -21
  245. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  246. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  247. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  248. package/src/plugins/pipeline.ts +6 -18
  249. package/src/plugins/registry.ts +8 -25
  250. package/src/plugins/types.ts +43 -474
  251. package/src/proactive-artifact/aux-message-injector.ts +3 -3
  252. package/src/proactive-artifact/job.test.ts +7 -12
  253. package/src/prompts/__tests__/system-prompt.test.ts +36 -0
  254. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
  255. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  256. package/src/prompts/templates/system-sections.ts +15 -0
  257. package/src/providers/anthropic/client.ts +37 -29
  258. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  259. package/src/providers/openai/chat-completions-provider.ts +44 -0
  260. package/src/providers/openrouter/client.ts +1 -0
  261. package/src/providers/placeholder-sentinels.ts +35 -0
  262. package/src/runtime/__tests__/agent-wake.test.ts +5 -1
  263. package/src/runtime/agent-wake.ts +2 -2
  264. package/src/runtime/assistant-event-hub.ts +36 -6
  265. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  266. package/src/runtime/http-router.ts +16 -21
  267. package/src/runtime/http-types.ts +16 -70
  268. package/src/runtime/pending-interactions.ts +1 -0
  269. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  270. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  271. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  272. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  273. package/src/runtime/routes/app-management-routes.ts +6 -117
  274. package/src/runtime/routes/app-routes.ts +13 -15
  275. package/src/runtime/routes/attachment-routes.ts +26 -15
  276. package/src/runtime/routes/avatar-routes.ts +26 -0
  277. package/src/runtime/routes/btw-routes.ts +29 -23
  278. package/src/runtime/routes/consolidation-routes.ts +120 -20
  279. package/src/runtime/routes/conversation-query-routes.ts +2 -0
  280. package/src/runtime/routes/conversation-routes.ts +358 -184
  281. package/src/runtime/routes/documents-routes.ts +4 -0
  282. package/src/runtime/routes/domain-routes.ts +51 -37
  283. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  284. package/src/runtime/routes/events-routes.ts +28 -34
  285. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  286. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  287. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  288. package/src/runtime/routes/identity-routes.ts +208 -17
  289. package/src/runtime/routes/image-generation-routes.ts +40 -2
  290. package/src/runtime/routes/index.ts +2 -0
  291. package/src/runtime/routes/integrations/a2a.ts +12 -10
  292. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  293. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  294. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  295. package/src/runtime/routes/integrations/telegram.ts +6 -0
  296. package/src/runtime/routes/integrations/twilio.ts +42 -0
  297. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  298. package/src/runtime/routes/log-export-routes.ts +8 -0
  299. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  300. package/src/runtime/routes/memory-v3-routes.ts +50 -28
  301. package/src/runtime/routes/oauth-apps.ts +66 -12
  302. package/src/runtime/routes/oauth-providers.ts +44 -5
  303. package/src/runtime/routes/platform-routes.ts +81 -5
  304. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  305. package/src/runtime/routes/playground/force-compact.ts +1 -1
  306. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  307. package/src/runtime/routes/schedule-routes.ts +152 -42
  308. package/src/runtime/routes/secret-routes.ts +14 -2
  309. package/src/runtime/routes/skills-routes.ts +43 -14
  310. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  311. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  312. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  313. package/src/runtime/routes/tts-routes.ts +35 -0
  314. package/src/runtime/routes/types.ts +66 -8
  315. package/src/runtime/routes/usage-routes.ts +47 -39
  316. package/src/runtime/routes/webhook-routes.ts +41 -2
  317. package/src/runtime/routes/workspace-routes.ts +4 -0
  318. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  319. package/src/runtime/services/analyze-conversation.ts +2 -2
  320. package/src/schedule/schedule-store.ts +20 -1
  321. package/src/schedule/schedule-usage-store.ts +83 -0
  322. package/src/schedule/scheduler.ts +12 -5
  323. package/src/skills/catalog-files.ts +2 -2
  324. package/src/skills/catalog-install.ts +3 -0
  325. package/src/skills/categories-cache.ts +118 -0
  326. package/src/skills/clawhub-files.ts +1 -2
  327. package/src/skills/skillssh-files.ts +1 -2
  328. package/src/telemetry/types.ts +29 -1
  329. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  330. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  331. package/src/tools/executor.ts +1 -53
  332. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  333. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  334. package/src/tools/network/web-search-error.test.ts +248 -0
  335. package/src/tools/network/web-search-error.ts +267 -0
  336. package/src/tools/network/web-search.ts +207 -48
  337. package/src/tools/schedule/create.ts +2 -0
  338. package/src/tools/terminal/safe-env.ts +10 -1
  339. package/src/tools/ui-surface/definitions.ts +9 -1
  340. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  341. package/src/tts/provider-catalog.ts +76 -1
  342. package/src/util/mutex.ts +47 -0
  343. package/src/workspace/git-service.ts +1 -42
  344. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  345. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  346. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
  347. package/src/workspace/migrations/registry.ts +6 -0
  348. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  349. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  350. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  351. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  352. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  353. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  354. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  355. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  356. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  357. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  358. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  359. package/src/gallery/default-gallery.ts +0 -1359
  360. package/src/gallery/gallery-manifest.ts +0 -28
  361. package/src/home/feature-gate.ts +0 -22
  362. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  363. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  364. package/src/plugins/defaults/injectors/package.json +0 -15
  365. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  366. package/src/plugins/defaults/llm-call/package.json +0 -15
  367. package/src/plugins/defaults/llm-call/register.ts +0 -45
  368. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  369. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  370. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  371. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  372. package/src/plugins/defaults/persistence/package.json +0 -15
  373. package/src/plugins/defaults/persistence/register.ts +0 -38
  374. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  375. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  376. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  377. package/src/plugins/defaults/token-estimate/package.json +0 -15
  378. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  379. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  380. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  381. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  382. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  383. package/src/plugins/defaults/tool-execute/package.json +0 -15
  384. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  385. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  386. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  387. package/src/skills/category-inference.ts +0 -111
@@ -1,26 +1,18 @@
1
1
  import { createRequire } from "node:module";
2
- import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
3
-
4
- import { CompactionCircuit } from "../agent/compaction-circuit.js";
5
- import type {
6
- AgentEvent,
7
- AgentLoopRunOptions,
8
- AgentLoopRunResult,
9
- MidLoopCompaction,
10
- } from "../agent/loop.js";
11
- import type { ContextWindowResult } from "../context/window-manager.js";
2
+ import {
3
+ afterAll,
4
+ beforeEach,
5
+ describe,
6
+ expect,
7
+ mock,
8
+ spyOn,
9
+ test,
10
+ } from "bun:test";
11
+
12
+ import type { LoopToolExecutor } from "../agent/loop.js";
12
13
  import type { ServerMessage } from "../daemon/message-protocol.js";
13
- import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
14
14
  import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
15
- import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
16
- import { getMiddlewaresFor } from "../plugins/registry.js";
17
- import type {
18
- CompactionArgs,
19
- CompactionResult,
20
- TurnContext,
21
- } from "../plugins/types.js";
22
- import { PluginTimeoutError } from "../plugins/types.js";
23
- import type { ContentBlock, Message } from "../providers/types.js";
15
+ import type { Message, Provider, ToolDefinition } from "../providers/types.js";
24
16
 
25
17
  const conversationCrudRealSnapshot = {
26
18
  ...(createRequire(import.meta.url)(
@@ -76,6 +68,7 @@ mock.module("../config/loader.js", () => ({
76
68
  memory: { retrieval: { scratchpadInjection: { enabled: true } } },
77
69
  ui: mockUiConfig,
78
70
  compaction: { enabled: true, autoThreshold: 0.7 },
71
+ conversations: { skipAutoRetitling: true },
79
72
  }),
80
73
  loadRawConfig: () => ({}),
81
74
  saveRawConfig: () => {},
@@ -86,17 +79,20 @@ mock.module("../config/loader.js", () => ({
86
79
 
87
80
  // Token estimator returns a small value by default (well within budget)
88
81
  // so preflight does not trigger unless the test overrides it. Both the
89
- // calibrated entry point (`estimatePromptTokens`, used in the convergence
90
- // path) and the raw entry point (`estimatePromptTokensRaw`, used by the
91
- // default `tokenEstimate` plugin pipeline for preflight/mid-loop) are
82
+ // calibrated entry point (`estimatePromptTokens`, which backs the preflight
83
+ // overflow gate and the convergence path) and the raw entry point
84
+ // (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
92
85
  // stubbed so either call site can drive the test.
93
86
  let mockEstimateTokens = 1000;
94
87
  mock.module("../context/token-estimator.js", () => ({
95
88
  estimatePromptTokens: () => mockEstimateTokens,
96
89
  estimatePromptTokensRaw: () => mockEstimateTokens,
97
- // Pass-through: the default plugin computes `toolTokenBudget` via this
98
- // helper before delegating to the raw estimator. Return 0 so the mocked
99
- // raw estimate is not perturbed.
90
+ // The preflight overflow gate calls this calibrated wrapper directly, so it
91
+ // must honor `mockEstimateTokens` too rather than fall through to the real
92
+ // implementation.
93
+ estimatePromptTokensWithTools: () => mockEstimateTokens,
94
+ // Pass-through: `estimatePromptTokensWithTools` computes `toolTokenBudget`
95
+ // via this helper. Return 0 so the mocked estimate is not perturbed.
100
96
  estimateToolsTokens: () => 0,
101
97
  }));
102
98
 
@@ -364,15 +360,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
364
360
  applyRuntimeInjections: applyRuntimeInjectionsMock,
365
361
  buildUnifiedTurnContextBlock: buildUnifiedTurnContextBlockMock,
366
362
  stripInjectionsForCompaction: (msgs: Message[]) => msgs,
367
- findLastInjectedNowContent: () => null,
368
- readNowScratchpad: () => null,
369
- readPkbContext: () => null,
370
- getPkbAutoInjectList: () => [
371
- "INDEX.md",
372
- "essentials.md",
373
- "threads.md",
374
- "buffer.md",
375
- ],
376
363
  isSlackChannelConversation: () => false,
377
364
  getSlackCompactionWatermarkForPrefix:
378
365
  getSlackCompactionWatermarkForPrefixMock,
@@ -549,195 +536,78 @@ mock.module("../proactive-artifact/index.js", () => ({
549
536
 
550
537
  // ── Imports (after mocks) ────────────────────────────────────────────
551
538
 
539
+ import { AgentLoop } from "../agent/loop.js";
552
540
  import {
553
541
  type AgentLoopConversationContext,
554
542
  applyCompactionResult,
555
543
  runAgentLoopImpl,
556
544
  } from "../daemon/conversation-agent-loop.js";
545
+ import {
546
+ createMockProvider,
547
+ type ScriptedResponse,
548
+ textResponse,
549
+ toolUseResponse,
550
+ } from "./helpers/mock-provider.js";
557
551
 
558
552
  // ── Test helpers ─────────────────────────────────────────────────────
559
553
 
560
- type AgentLoopRun = (
561
- messages: Message[],
562
- onEvent: (event: AgentEvent) => void | Promise<void>,
563
- options?: AgentLoopRunOptions,
564
- ) => Promise<Message[]>;
565
-
566
- /**
567
- * Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
568
- * the compaction pipeline against the supplied turn context (which carries the
569
- * test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
570
- * return the continuation history — or `null` on timeout/exhaustion so the
571
- * caller yields "budget".
572
- */
573
- async function simulateInlineCompaction(
574
- compaction: MidLoopCompaction,
575
- history: Message[],
576
- turnContext: TurnContext | undefined,
577
- signal: AbortSignal | undefined,
578
- onEvent: (event: AgentEvent) => void | Promise<void>,
579
- compactionCircuit: CompactionCircuit,
580
- ): Promise<Message[] | null> {
581
- await onEvent({ type: "context_compacting" });
582
- const { rawHistory, options } = compaction.prepare(history);
583
- let result: CompactionResult;
584
- try {
585
- result = await runPipeline<CompactionArgs, CompactionResult>(
586
- "compaction",
587
- getMiddlewaresFor("compaction"),
588
- (args) => defaultCompactionTerminal(args, turnContext as TurnContext),
589
- { messages: rawHistory, signal, options },
590
- turnContext as TurnContext,
591
- DEFAULT_TIMEOUTS.compaction,
592
- );
593
- } catch (error) {
594
- if (error instanceof PluginTimeoutError) {
595
- await compactionCircuit.recordOutcome(
596
- {
597
- currentRequestId: turnContext?.requestId,
598
- currentTurnTrustContext: turnContext?.trust,
599
- turnCount: turnContext?.turnIndex ?? 0,
600
- },
601
- true,
602
- onEvent,
603
- );
604
- return null;
605
- }
606
- throw error;
607
- }
608
- const compactResult = result as ContextWindowResult;
609
- if (compactResult.summaryFailed !== undefined) {
610
- await compactionCircuit.recordOutcome(
611
- {
612
- currentRequestId: turnContext?.requestId,
613
- currentTurnTrustContext: turnContext?.trust,
614
- turnCount: turnContext?.turnIndex ?? 0,
615
- },
616
- compactResult.summaryFailed,
617
- onEvent,
618
- );
619
- }
620
- if (compactResult.compacted) {
621
- await compaction.applyResult(compactResult, rawHistory);
622
- }
623
- if (compactResult.exhausted ?? false) {
624
- return null;
625
- }
626
- return compaction.reinject();
627
- }
628
-
629
- /**
630
- * Adapt a `Message[]`-returning mock loop body into `run()`'s real result
631
- * shape. Mirrors the production loop: the pause-reason carried back is
632
- * whatever the most recent `onCheckpoint` call yielded with (null when it
633
- * never yielded), so the orchestrator derives its yield bookkeeping the same
634
- * way it does against the real loop.
635
- */
636
- const asAgentLoopRun = (
637
- fn: AgentLoopRun,
638
- compactionCircuit: CompactionCircuit,
639
- ): ((
640
- messages: Message[],
641
- onEvent: (event: AgentEvent) => void | Promise<void>,
642
- options?: AgentLoopRunOptions,
643
- ) => Promise<AgentLoopRunResult>) => {
644
- return async (messages, onEvent, options) => {
645
- let exitReason: AgentLoopRunResult["exitReason"] = null;
646
- let wrapped = options;
647
- if (options?.onCheckpoint) {
648
- const inner = options.onCheckpoint;
649
- wrapped = {
650
- ...options,
651
- onCheckpoint: async (info) => {
652
- // Handoff is offered first, mirroring the loop's ordering.
653
- const decision = await inner(info);
654
- if (decision !== "continue") {
655
- exitReason = decision;
656
- return decision;
657
- }
658
- // The mid-loop budget gate and inline compaction both live inside
659
- // `AgentLoop.run`. Replicate them here — same formula, stubbed
660
- // estimator, and the loop's own `compact()` ceremony — so these
661
- // orchestrator tests drive the real escalation path now that the
662
- // orchestrator's `onCheckpoint` is handoff-only and compaction runs
663
- // inline rather than via an orchestrator re-entry loop.
664
- const contextWindow = options.resolveContextWindow?.();
665
- if (contextWindow?.overflowRecovery.enabled) {
666
- const { maxInputTokens, overflowRecovery } = contextWindow;
667
- const safetyMargin =
668
- info.history.length > 50
669
- ? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
670
- : overflowRecovery.safetyMarginRatio;
671
- const preflightBudget = Math.floor(
672
- maxInputTokens * (1 - safetyMargin),
673
- );
674
- if (mockEstimateTokens > preflightBudget * 0.85) {
675
- // Mirror `AgentLoop.compact()`: when a compaction path is
676
- // supplied, run it in place and continue; on timeout or
677
- // exhaustion it returns null, so the loop yields "budget".
678
- const compacted = options.compaction
679
- ? await simulateInlineCompaction(
680
- options.compaction,
681
- info.history,
682
- options.turnContext,
683
- options.signal,
684
- onEvent,
685
- compactionCircuit,
686
- )
687
- : null;
688
- if (compacted) {
689
- exitReason = null;
690
- return "continue";
691
- }
692
- exitReason = "budget";
693
- return "budget";
694
- }
695
- }
696
- exitReason = null;
697
- return "continue";
698
- },
699
- };
700
- }
701
- const history = await fn(messages, onEvent, wrapped);
702
- return { history, exitReason };
703
- };
704
- };
705
-
706
554
  function makeCtx(
707
555
  overrides?: Partial<AgentLoopConversationContext> & {
708
- agentLoopRun?: AgentLoopRun;
556
+ providerResponses?: ScriptedResponse[];
557
+ loopProvider?: Provider;
558
+ loopTools?: ToolDefinition[];
559
+ toolExecutor?: LoopToolExecutor;
709
560
  },
710
561
  ): AgentLoopConversationContext {
711
- const agentLoopRun =
712
- overrides?.agentLoopRun ??
713
- (async (messages: Message[]) => [
714
- ...messages,
715
- {
716
- role: "assistant" as const,
717
- content: [{ type: "text" as const, text: "response" }],
718
- },
719
- ]);
720
-
721
- const compactionCircuit = new CompactionCircuit("test-conv");
562
+ const {
563
+ providerResponses,
564
+ loopProvider,
565
+ loopTools,
566
+ toolExecutor,
567
+ ...ctxOverrides
568
+ } = overrides ?? {};
569
+ const conversationId = ctxOverrides.conversationId ?? "test-conv";
570
+ let processing = true;
571
+
572
+ // Drive the real `AgentLoop` against a scripted provider, mocking only the
573
+ // provider HTTP boundary. The loop owns its mid-loop budget gate, inline
574
+ // compaction, and event emission, so these orchestrator tests exercise the
575
+ // real escalation/persistence path.
576
+ //
577
+ // Name the loop's provider after `ctx.provider` so the two stay in sync,
578
+ // mirroring production where the orchestrator hands the same provider to
579
+ // the loop. The loop stamps this name onto `usage.actualProvider` whenever
580
+ // a response omits its own, which is what the request-log fallback reads.
581
+ // Tests that need to introspect provider calls (or sequence a rejection)
582
+ // build their own `loopProvider` via `createMockProvider`.
583
+ const loopProviderName =
584
+ (ctxOverrides.provider as { name?: string } | undefined)?.name ??
585
+ "mock-provider";
586
+ const provider =
587
+ loopProvider ??
588
+ createMockProvider(
589
+ providerResponses ?? [textResponse("response")],
590
+ loopProviderName,
591
+ ).provider;
592
+ const agentLoop = new AgentLoop(provider, "system prompt", {
593
+ conversationId,
594
+ tools: loopTools ?? [],
595
+ toolExecutor,
596
+ });
722
597
 
723
598
  return {
724
599
  conversationId: "test-conv",
725
600
  messages: [
726
601
  { role: "user", content: [{ type: "text", text: "Hello" }] },
727
602
  ] as Message[],
728
- processing: true,
603
+ isProcessing: () => processing,
604
+ setProcessing: (value: boolean) => {
605
+ processing = value;
606
+ },
729
607
  abortController: new AbortController(),
730
608
  currentRequestId: "test-req",
731
609
 
732
- agentLoop: {
733
- run: asAgentLoopRun(agentLoopRun, compactionCircuit),
734
- getToolTokenBudget: () => 0,
735
- getResolvedTools: () => [],
736
- // Tests here don't exercise calibration; returning undefined makes
737
- // the estimator use the per-provider aggregate key.
738
- getActiveModel: () => undefined,
739
- compactionCircuit,
740
- } as unknown as AgentLoopConversationContext["agentLoop"],
610
+ agentLoop,
741
611
  provider: {
742
612
  name: "mock-provider",
743
613
  sendMessage: async () => ({
@@ -830,9 +700,10 @@ function makeCtx(
830
700
  injectedTokens: 0,
831
701
  }),
832
702
  retrackCachedNodes: () => {},
703
+ recordPkbQueryVectors: () => {},
833
704
  } as unknown as AgentLoopConversationContext["graphMemory"],
834
705
 
835
- ...overrides,
706
+ ...ctxOverrides,
836
707
  } as AgentLoopConversationContext;
837
708
  }
838
709
 
@@ -970,57 +841,28 @@ describe("session-agent-loop", () => {
970
841
  mockHasProactiveArtifactCompleted = false;
971
842
  mockTryClaimProactiveArtifactTrigger = true;
972
843
 
973
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
974
- // Prime the assistant row anchor for LLM call 1 — production code
975
- // emits this from `AgentLoop.run` just before `provider.sendMessage`.
976
- await onEvent({ type: "llm_call_started" });
977
- await onEvent({
978
- type: "message_complete",
979
- message: {
980
- role: "assistant",
981
- content: [{ type: "text", text: "I'll build that app." }],
982
- },
983
- });
984
- await onEvent({
985
- type: "tool_use",
986
- id: "tool-1",
987
- name: "app_create",
988
- input: { name: "Flow" },
989
- });
990
- await onEvent({
991
- type: "tool_result",
992
- toolUseId: "tool-1",
993
- content: "{}",
994
- isError: false,
995
- });
996
- await options?.onCheckpoint?.({
997
- turnIndex: 0,
998
- toolCount: 1,
999
- hasToolUse: true,
1000
- history: messages,
1001
- });
1002
- // Prime the anchor again for LLM call 2 — multi-call agent turns
1003
- // reserve a fresh assistant row per LLM call.
1004
- await onEvent({ type: "llm_call_started" });
1005
- await onEvent({
1006
- type: "message_complete",
1007
- message: {
1008
- role: "assistant",
1009
- content: [{ type: "text", text: "Done." }],
1010
- },
1011
- });
1012
- return [
1013
- ...messages,
1014
- {
1015
- role: "assistant" as const,
1016
- content: [{ type: "text" as const, text: "Done." }],
1017
- },
1018
- ];
1019
- };
1020
-
844
+ // A two-call agent turn: the model invokes `app_create`, then wraps up
845
+ // with a final text reply.
1021
846
  const ctx = makeCtx({
1022
847
  conversationId: "test-conv",
1023
- agentLoopRun,
848
+ providerResponses: [
849
+ {
850
+ content: [
851
+ { type: "text", text: "I'll build that app." },
852
+ {
853
+ type: "tool_use",
854
+ id: "tool-1",
855
+ name: "app_create",
856
+ input: { name: "Flow" },
857
+ },
858
+ ],
859
+ model: "mock-model",
860
+ usage: { inputTokens: 10, outputTokens: 5 },
861
+ stopReason: "tool_use",
862
+ },
863
+ textResponse("Done."),
864
+ ],
865
+ toolExecutor: async () => ({ content: "{}", isError: false }),
1024
866
  });
1025
867
  await runAgentLoopImpl(
1026
868
  ctx,
@@ -1156,9 +998,6 @@ describe("session-agent-loop", () => {
1156
998
  reason: "trusted-contact",
1157
999
  };
1158
1000
  const events: ServerMessage[] = [];
1159
- const agentLoopRun = mock(async (_messages: Message[]) => {
1160
- throw new Error("agent loop should not run");
1161
- });
1162
1001
  const activityStates: unknown[][] = [];
1163
1002
  const traceEvents: unknown[][] = [];
1164
1003
  const ctx = makeCtx({
@@ -1171,14 +1010,11 @@ describe("session-agent-loop", () => {
1171
1010
  },
1172
1011
  } as unknown as AgentLoopConversationContext["traceEmitter"],
1173
1012
  });
1174
- ctx.agentLoop.run = asAgentLoopRun(
1175
- agentLoopRun,
1176
- ctx.agentLoop.compactionCircuit,
1177
- );
1013
+ const runSpy = spyOn(ctx.agentLoop, "run");
1178
1014
 
1179
1015
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
1180
1016
 
1181
- expect(agentLoopRun).not.toHaveBeenCalled();
1017
+ expect(runSpy).not.toHaveBeenCalled();
1182
1018
  expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
1183
1019
  expect(activityStates).toContainEqual([
1184
1020
  "idle",
@@ -1238,7 +1074,7 @@ describe("session-agent-loop", () => {
1238
1074
  });
1239
1075
 
1240
1076
  expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
1241
- expect(ctx.processing).toBe(false);
1077
+ expect(ctx.isProcessing()).toBe(false);
1242
1078
  expect(ctx.abortController).toBeNull();
1243
1079
  expect(ctx.currentRequestId).toBeUndefined();
1244
1080
  expect(drainQueue).toHaveBeenCalledWith("loop_complete");
@@ -1254,47 +1090,14 @@ describe("session-agent-loop", () => {
1254
1090
  test("error events from agent loop are classified and emitted", async () => {
1255
1091
  const events: ServerMessage[] = [];
1256
1092
 
1257
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1258
- // Prime the assistant row anchor — production code emits this from
1259
- // `AgentLoop.run` just before `provider.sendMessage`.
1260
- await onEvent({ type: "llm_call_started" });
1261
- // Simulate tool_use + error during execution
1262
- onEvent({
1263
- type: "tool_use",
1264
- id: "tu-1",
1265
- name: "bash",
1266
- input: { cmd: "ls" },
1267
- });
1268
- onEvent({
1269
- type: "error",
1270
- error: new Error("Tool execution failed: permission denied"),
1271
- });
1272
- onEvent({
1273
- type: "message_complete",
1274
- message: {
1275
- role: "assistant",
1276
- content: [{ type: "text", text: "I encountered an error" }],
1277
- },
1278
- });
1279
- onEvent({
1280
- type: "usage",
1281
- inputTokens: 100,
1282
- outputTokens: 50,
1283
- model: "test-model",
1284
- providerDurationMs: 200,
1285
- });
1286
- return [
1287
- ...messages,
1288
- {
1289
- role: "assistant" as const,
1290
- content: [
1291
- { type: "text", text: "I encountered an error" },
1292
- ] as ContentBlock[],
1293
- },
1294
- ];
1295
- };
1296
-
1297
- const ctx = makeCtx({ agentLoopRun });
1093
+ // The model calls a tool whose executor throws, surfacing an `error`
1094
+ // event from the loop's catch handler.
1095
+ const ctx = makeCtx({
1096
+ providerResponses: [toolUseResponse("tu-1", "bash", { cmd: "ls" })],
1097
+ toolExecutor: async () => {
1098
+ throw new Error("Tool execution failed: permission denied");
1099
+ },
1100
+ });
1298
1101
  await runAgentLoopImpl(ctx, "run ls", "msg-1", (msg) => events.push(msg));
1299
1102
 
1300
1103
  const conversationError = events.find(
@@ -1306,34 +1109,9 @@ describe("session-agent-loop", () => {
1306
1109
  test("non-error agent loop completion does not emit conversation_error", async () => {
1307
1110
  const events: ServerMessage[] = [];
1308
1111
 
1309
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1310
- // Prime the assistant row anchor — production code emits this from
1311
- // `AgentLoop.run` just before `provider.sendMessage`.
1312
- await onEvent({ type: "llm_call_started" });
1313
- onEvent({
1314
- type: "message_complete",
1315
- message: {
1316
- role: "assistant",
1317
- content: [{ type: "text", text: "All good" }],
1318
- },
1319
- });
1320
- onEvent({
1321
- type: "usage",
1322
- inputTokens: 50,
1323
- outputTokens: 25,
1324
- model: "test-model",
1325
- providerDurationMs: 100,
1326
- });
1327
- return [
1328
- ...messages,
1329
- {
1330
- role: "assistant" as const,
1331
- content: [{ type: "text", text: "All good" }] as ContentBlock[],
1332
- },
1333
- ];
1334
- };
1335
-
1336
- const ctx = makeCtx({ agentLoopRun });
1112
+ const ctx = makeCtx({
1113
+ providerResponses: [textResponse("All good")],
1114
+ });
1337
1115
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
1338
1116
 
1339
1117
  const conversationError = events.find(
@@ -1369,38 +1147,20 @@ describe("session-agent-loop", () => {
1369
1147
  },
1370
1148
  };
1371
1149
 
1372
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1373
- // Prime the assistant row anchor production code emits this from
1374
- // `AgentLoop.run` just before `provider.sendMessage`.
1375
- await onEvent({ type: "llm_call_started" });
1376
- onEvent({
1377
- type: "message_complete",
1378
- message: {
1379
- role: "assistant",
1380
- content: [{ type: "text", text: "Hi there." }],
1381
- },
1382
- });
1383
- onEvent({
1384
- type: "usage",
1385
- inputTokens: 12,
1386
- outputTokens: 3,
1387
- model: "gpt-4.1-2026-03-01",
1388
- actualProvider: "fireworks",
1389
- providerDurationMs: 45,
1390
- rawRequest,
1391
- rawResponse,
1392
- });
1393
- return [
1394
- ...messages,
1150
+ // The provider response carries its own `actualProvider`, so the logged
1151
+ // row should record that name rather than the runtime provider.
1152
+ const ctx = makeCtx({
1153
+ providerResponses: [
1395
1154
  {
1396
- role: "assistant" as const,
1397
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1155
+ content: [{ type: "text", text: "Hi there." }],
1156
+ model: "gpt-4.1-2026-03-01",
1157
+ usage: { inputTokens: 12, outputTokens: 3 },
1158
+ stopReason: "end_turn",
1159
+ actualProvider: "fireworks",
1160
+ rawRequest,
1161
+ rawResponse,
1398
1162
  },
1399
- ];
1400
- };
1401
-
1402
- const ctx = makeCtx({
1403
- agentLoopRun,
1163
+ ],
1404
1164
  provider: {
1405
1165
  name: "openrouter",
1406
1166
  sendMessage: async () => ({
@@ -1437,37 +1197,19 @@ describe("session-agent-loop", () => {
1437
1197
  ],
1438
1198
  };
1439
1199
 
1440
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1441
- // Prime the assistant row anchor production code emits this from
1442
- // `AgentLoop.run` just before `provider.sendMessage`.
1443
- await onEvent({ type: "llm_call_started" });
1444
- onEvent({
1445
- type: "message_complete",
1446
- message: {
1447
- role: "assistant",
1448
- content: [{ type: "text", text: "Hi there." }],
1449
- },
1450
- });
1451
- onEvent({
1452
- type: "usage",
1453
- inputTokens: 12,
1454
- outputTokens: 3,
1455
- model: "gpt-4.1-2026-03-01",
1456
- providerDurationMs: 45,
1457
- rawRequest,
1458
- rawResponse,
1459
- });
1460
- return [
1461
- ...messages,
1200
+ // The provider response omits `actualProvider`, so the loop stamps the
1201
+ // runtime provider name onto the usage event and the row records it.
1202
+ const ctx = makeCtx({
1203
+ providerResponses: [
1462
1204
  {
1463
- role: "assistant" as const,
1464
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1205
+ content: [{ type: "text", text: "Hi there." }],
1206
+ model: "gpt-4.1-2026-03-01",
1207
+ usage: { inputTokens: 12, outputTokens: 3 },
1208
+ stopReason: "end_turn",
1209
+ rawRequest,
1210
+ rawResponse,
1465
1211
  },
1466
- ];
1467
- };
1468
-
1469
- const ctx = makeCtx({
1470
- agentLoopRun,
1212
+ ],
1471
1213
  provider: {
1472
1214
  name: "openrouter",
1473
1215
  sendMessage: async () => ({
@@ -1522,38 +1264,18 @@ describe("session-agent-loop", () => {
1522
1264
  status: "completed",
1523
1265
  };
1524
1266
 
1525
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1526
- // Prime the assistant row anchor — production code emits this from
1527
- // `AgentLoop.run` just before `provider.sendMessage`.
1528
- await onEvent({ type: "llm_call_started" });
1529
- onEvent({
1530
- type: "message_complete",
1531
- message: {
1532
- role: "assistant",
1533
- content: [{ type: "text", text: "Hi there." }],
1534
- },
1535
- });
1536
- onEvent({
1537
- type: "usage",
1538
- inputTokens: 12,
1539
- outputTokens: 3,
1540
- model: "gpt-5.4",
1541
- actualProvider: "openai",
1542
- providerDurationMs: 45,
1543
- rawRequest,
1544
- rawResponse,
1545
- });
1546
- return [
1547
- ...messages,
1267
+ const ctx = makeCtx({
1268
+ providerResponses: [
1548
1269
  {
1549
- role: "assistant" as const,
1550
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1270
+ content: [{ type: "text", text: "Hi there." }],
1271
+ model: "gpt-5.4",
1272
+ usage: { inputTokens: 12, outputTokens: 3 },
1273
+ stopReason: "end_turn",
1274
+ actualProvider: "openai",
1275
+ rawRequest,
1276
+ rawResponse,
1551
1277
  },
1552
- ];
1553
- };
1554
-
1555
- const ctx = makeCtx({
1556
- agentLoopRun,
1278
+ ],
1557
1279
  provider: {
1558
1280
  name: "openai",
1559
1281
  sendMessage: async () => ({
@@ -1593,37 +1315,17 @@ describe("session-agent-loop", () => {
1593
1315
  attrs: Record<string, unknown>;
1594
1316
  }> = [];
1595
1317
 
1596
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1597
- // Prime the assistant row anchor production code emits this from
1598
- // `AgentLoop.run` just before `provider.sendMessage`.
1599
- await onEvent({ type: "llm_call_started" });
1600
- onEvent({ type: "text_delta", text: "Hi." });
1601
- onEvent({
1602
- type: "message_complete",
1603
- message: {
1604
- role: "assistant",
1605
- content: [{ type: "text", text: "Hi." }],
1606
- },
1607
- });
1608
- onEvent({
1609
- type: "usage",
1610
- inputTokens: 10,
1611
- outputTokens: 2,
1612
- model: "gpt-5.5-2026-04-23",
1613
- actualProvider: "openai",
1614
- providerDurationMs: 100,
1615
- });
1616
- return [
1617
- ...messages,
1318
+ const ctx = makeCtx({
1319
+ // The loop replays the text block as a `text_delta` before `usage`.
1320
+ providerResponses: [
1618
1321
  {
1619
- role: "assistant" as const,
1620
- content: [{ type: "text", text: "Hi." }] as ContentBlock[],
1322
+ content: [{ type: "text", text: "Hi." }],
1323
+ model: "gpt-5.5-2026-04-23",
1324
+ usage: { inputTokens: 10, outputTokens: 2 },
1325
+ stopReason: "end_turn",
1326
+ actualProvider: "openai",
1621
1327
  },
1622
- ];
1623
- };
1624
-
1625
- const ctx = makeCtx({
1626
- agentLoopRun,
1328
+ ],
1627
1329
  // Provider name matches actualProvider so both paths agree.
1628
1330
  provider: {
1629
1331
  name: "openai",
@@ -1671,31 +1373,18 @@ describe("session-agent-loop", () => {
1671
1373
  attrs: Record<string, unknown>;
1672
1374
  }> = [];
1673
1375
 
1674
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1675
- // Prime the assistant row anchor production code emits this from
1676
- // `AgentLoop.run` just before `provider.sendMessage`.
1677
- await onEvent({ type: "llm_call_started" });
1678
- // No text_delta — pure tool-call response
1679
- onEvent({
1680
- type: "message_complete",
1681
- message: {
1682
- role: "assistant",
1376
+ const ctx = makeCtx({
1377
+ // An empty-content response: no text block fires `text_delta`, so the
1378
+ // started event falls back to the resolved usage provider name.
1379
+ providerResponses: [
1380
+ {
1683
1381
  content: [],
1382
+ model: "gpt-5.5-2026-04-23",
1383
+ usage: { inputTokens: 10, outputTokens: 2 },
1384
+ stopReason: "end_turn",
1385
+ actualProvider: "openai",
1684
1386
  },
1685
- });
1686
- onEvent({
1687
- type: "usage",
1688
- inputTokens: 10,
1689
- outputTokens: 2,
1690
- model: "gpt-5.5-2026-04-23",
1691
- actualProvider: "openai",
1692
- providerDurationMs: 100,
1693
- });
1694
- return messages;
1695
- };
1696
-
1697
- const ctx = makeCtx({
1698
- agentLoopRun,
1387
+ ],
1699
1388
  provider: {
1700
1389
  name: "anthropic",
1701
1390
  sendMessage: async () => ({
@@ -1737,52 +1426,32 @@ describe("session-agent-loop", () => {
1737
1426
  test("records the actual provider for usage accounting", async () => {
1738
1427
  const events: ServerMessage[] = [];
1739
1428
 
1740
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1741
- // Prime the assistant row anchor — production code emits this from
1742
- // `AgentLoop.run` just before `provider.sendMessage`.
1743
- await onEvent({ type: "llm_call_started" });
1744
- onEvent({
1745
- type: "message_complete",
1746
- message: {
1747
- role: "assistant",
1429
+ const ctx = makeCtx({
1430
+ providerResponses: [
1431
+ {
1748
1432
  content: [{ type: "text", text: "Hi there." }],
1749
- },
1750
- });
1751
- onEvent({
1752
- type: "usage",
1753
- inputTokens: 12,
1754
- outputTokens: 3,
1755
- model: "gpt-4.1-2026-03-01",
1756
- actualProvider: "fireworks",
1757
- providerDurationMs: 45,
1758
- rawRequest: {
1759
- model: "gpt-4.1",
1760
- messages: [{ role: "user", content: "Hello" }],
1761
- },
1762
- rawResponse: {
1763
1433
  model: "gpt-4.1-2026-03-01",
1764
- choices: [
1765
- {
1766
- finish_reason: "stop",
1767
- message: {
1768
- role: "assistant",
1769
- content: "Hi there.",
1434
+ usage: { inputTokens: 12, outputTokens: 3 },
1435
+ stopReason: "end_turn",
1436
+ actualProvider: "fireworks",
1437
+ rawRequest: {
1438
+ model: "gpt-4.1",
1439
+ messages: [{ role: "user", content: "Hello" }],
1440
+ },
1441
+ rawResponse: {
1442
+ model: "gpt-4.1-2026-03-01",
1443
+ choices: [
1444
+ {
1445
+ finish_reason: "stop",
1446
+ message: {
1447
+ role: "assistant",
1448
+ content: "Hi there.",
1449
+ },
1770
1450
  },
1771
- },
1772
- ],
1773
- },
1774
- });
1775
- return [
1776
- ...messages,
1777
- {
1778
- role: "assistant" as const,
1779
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1451
+ ],
1452
+ },
1780
1453
  },
1781
- ];
1782
- };
1783
-
1784
- const ctx = makeCtx({
1785
- agentLoopRun,
1454
+ ],
1786
1455
  provider: {
1787
1456
  name: "openrouter",
1788
1457
  sendMessage: async () => ({
@@ -1852,27 +1521,9 @@ describe("session-agent-loop", () => {
1852
1521
  },
1853
1522
  });
1854
1523
 
1855
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1856
- // Prime the assistant row anchor — production code emits this from
1857
- // `AgentLoop.run` just before `provider.sendMessage`.
1858
- await onEvent({ type: "llm_call_started" });
1859
- onEvent({
1860
- type: "message_complete",
1861
- message: {
1862
- role: "assistant",
1863
- content: [{ type: "text", text: "recovered" }],
1864
- },
1865
- });
1866
- return [
1867
- ...messages,
1868
- {
1869
- role: "assistant" as const,
1870
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
1871
- },
1872
- ];
1873
- };
1874
-
1875
- const ctx = makeCtx({ agentLoopRun });
1524
+ // After the orchestrator's preflight compaction runs, the loop completes
1525
+ // the turn normally.
1526
+ const ctx = makeCtx({ providerResponses: [textResponse("recovered")] });
1876
1527
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
1877
1528
 
1878
1529
  const compactorCall = recordUsageMock.mock.calls.find(
@@ -1911,7 +1562,6 @@ describe("session-agent-loop", () => {
1911
1562
 
1912
1563
  test("convergence loop applies reducer and retries when context-too-large is detected", async () => {
1913
1564
  const events: ServerMessage[] = [];
1914
- let callCount = 0;
1915
1565
  let reducerCalled = false;
1916
1566
 
1917
1567
  // Configure reducer to succeed on first call — return reduced messages
@@ -1945,53 +1595,15 @@ describe("session-agent-loop", () => {
1945
1595
  };
1946
1596
  };
1947
1597
 
1948
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1949
- // Prime the assistant row anchor production code emits this from
1950
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
1951
- // need this on every invocation: each agent-loop iteration reserves
1952
- // its own row.
1953
- await onEvent({ type: "llm_call_started" });
1954
- callCount++;
1955
- if (callCount === 1) {
1956
- onEvent({
1957
- type: "error",
1958
- error: new Error("context_length_exceeded"),
1959
- });
1960
- onEvent({
1961
- type: "usage",
1962
- inputTokens: 100,
1963
- outputTokens: 0,
1964
- model: "test-model",
1965
- providerDurationMs: 50,
1966
- });
1967
- return messages;
1968
- }
1969
- // Second call (after reducer): succeed
1970
- onEvent({
1971
- type: "message_complete",
1972
- message: {
1973
- role: "assistant",
1974
- content: [{ type: "text", text: "recovered" }],
1975
- },
1976
- });
1977
- onEvent({
1978
- type: "usage",
1979
- inputTokens: 50,
1980
- outputTokens: 25,
1981
- model: "test-model",
1982
- providerDurationMs: 100,
1983
- });
1984
- return [
1985
- ...messages,
1986
- {
1987
- role: "assistant" as const,
1988
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
1989
- },
1990
- ];
1991
- };
1598
+ // The provider rejects the first call with a context-too-large error,
1599
+ // then succeeds once the orchestrator has reduced the context.
1600
+ const { provider, calls } = createMockProvider([
1601
+ new Error("context_length_exceeded"),
1602
+ textResponse("recovered"),
1603
+ ]);
1992
1604
 
1993
1605
  const ctx = makeCtx({
1994
- agentLoopRun,
1606
+ loopProvider: provider,
1995
1607
  contextWindowManager: {
1996
1608
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1997
1609
  maybeCompact: async () => ({ compacted: false }),
@@ -2001,7 +1613,7 @@ describe("session-agent-loop", () => {
2001
1613
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2002
1614
 
2003
1615
  expect(reducerCalled).toBe(true);
2004
- expect(callCount).toBe(2);
1616
+ expect(calls.length).toBe(2);
2005
1617
  const compactEvent = events.find((e) => e.type === "context_compacted");
2006
1618
  expect(compactEvent).toBeDefined();
2007
1619
  });
@@ -2009,23 +1621,10 @@ describe("session-agent-loop", () => {
2009
1621
  test("emits conversation_error when context stays too large after all recovery attempts", async () => {
2010
1622
  const events: ServerMessage[] = [];
2011
1623
 
2012
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2013
- onEvent({
2014
- type: "error",
2015
- error: new Error("context_length_exceeded"),
2016
- });
2017
- onEvent({
2018
- type: "usage",
2019
- inputTokens: 100,
2020
- outputTokens: 0,
2021
- model: "test-model",
2022
- providerDurationMs: 50,
2023
- });
2024
- return messages;
2025
- };
2026
-
1624
+ // The provider rejects every call with a context-too-large error, so the
1625
+ // orchestrator exhausts its recovery attempts.
2027
1626
  const ctx = makeCtx({
2028
- agentLoopRun,
1627
+ providerResponses: [new Error("context_length_exceeded")],
2029
1628
  contextWindowManager: {
2030
1629
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2031
1630
  // Compaction succeeds but context is still too large
@@ -2059,7 +1658,6 @@ describe("session-agent-loop", () => {
2059
1658
 
2060
1659
  test("bounded convergence loop applies reducer tiers and recovers", async () => {
2061
1660
  const events: ServerMessage[] = [];
2062
- let callCount = 0;
2063
1661
  let reducerCalls = 0;
2064
1662
 
2065
1663
  // Reducer: succeed on first call, returning reduced messages
@@ -2077,55 +1675,15 @@ describe("session-agent-loop", () => {
2077
1675
  };
2078
1676
  };
2079
1677
 
2080
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2081
- // Prime the assistant row anchor production code emits this from
2082
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2083
- // need this on every invocation: each agent-loop iteration reserves
2084
- // its own row.
2085
- await onEvent({ type: "llm_call_started" });
2086
- callCount++;
2087
- if (callCount === 1) {
2088
- onEvent({
2089
- type: "error",
2090
- error: new Error("context_length_exceeded"),
2091
- });
2092
- onEvent({
2093
- type: "usage",
2094
- inputTokens: 100,
2095
- outputTokens: 0,
2096
- model: "test-model",
2097
- providerDurationMs: 50,
2098
- });
2099
- return messages;
2100
- }
2101
- // After reducer runs, succeed
2102
- onEvent({
2103
- type: "message_complete",
2104
- message: {
2105
- role: "assistant",
2106
- content: [{ type: "text", text: "recovered via convergence" }],
2107
- },
2108
- });
2109
- onEvent({
2110
- type: "usage",
2111
- inputTokens: 50,
2112
- outputTokens: 25,
2113
- model: "test-model",
2114
- providerDurationMs: 100,
2115
- });
2116
- return [
2117
- ...messages,
2118
- {
2119
- role: "assistant" as const,
2120
- content: [
2121
- { type: "text", text: "recovered via convergence" },
2122
- ] as ContentBlock[],
2123
- },
2124
- ];
2125
- };
1678
+ // The provider rejects the first call with a context-too-large error,
1679
+ // then succeeds once the orchestrator has reduced the context.
1680
+ const { provider, calls } = createMockProvider([
1681
+ new Error("context_length_exceeded"),
1682
+ textResponse("recovered via convergence"),
1683
+ ]);
2126
1684
 
2127
1685
  const ctx = makeCtx({
2128
- agentLoopRun,
1686
+ loopProvider: provider,
2129
1687
  contextWindowManager: {
2130
1688
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2131
1689
  maybeCompact: async () => ({ compacted: false }),
@@ -2135,7 +1693,7 @@ describe("session-agent-loop", () => {
2135
1693
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2136
1694
 
2137
1695
  expect(reducerCalls).toBeGreaterThanOrEqual(1);
2138
- expect(callCount).toBe(2);
1696
+ expect(calls.length).toBe(2);
2139
1697
  const conversationError = events.find(
2140
1698
  (e) => e.type === "conversation_error",
2141
1699
  );
@@ -2146,7 +1704,6 @@ describe("session-agent-loop", () => {
2146
1704
 
2147
1705
  test("non-interactive auto-compress continues without approval prompt", async () => {
2148
1706
  const events: ServerMessage[] = [];
2149
- let callCount = 0;
2150
1707
 
2151
1708
  // Reducer exhausts all tiers
2152
1709
  mockReducerStepFn = (msgs: Message[]) => ({
@@ -2167,54 +1724,14 @@ describe("session-agent-loop", () => {
2167
1724
 
2168
1725
  mockOverflowAction = "auto_compress_latest_turn";
2169
1726
 
2170
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2171
- // Prime the assistant row anchor production code emits this from
2172
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2173
- // need this on every invocation: each agent-loop iteration reserves
2174
- // its own row.
2175
- await onEvent({ type: "llm_call_started" });
2176
- callCount++;
2177
- if (callCount <= 2) {
2178
- onEvent({
2179
- type: "error",
2180
- error: new Error("context_length_exceeded"),
2181
- });
2182
- onEvent({
2183
- type: "usage",
2184
- inputTokens: 100,
2185
- outputTokens: 0,
2186
- model: "test-model",
2187
- providerDurationMs: 50,
2188
- });
2189
- return messages;
2190
- }
2191
- onEvent({
2192
- type: "message_complete",
2193
- message: {
2194
- role: "assistant",
2195
- content: [{ type: "text", text: "auto-recovered" }],
2196
- },
2197
- });
2198
- onEvent({
2199
- type: "usage",
2200
- inputTokens: 50,
2201
- outputTokens: 25,
2202
- model: "test-model",
2203
- providerDurationMs: 100,
2204
- });
2205
- return [
2206
- ...messages,
2207
- {
2208
- role: "assistant" as const,
2209
- content: [
2210
- { type: "text", text: "auto-recovered" },
2211
- ] as ContentBlock[],
2212
- },
2213
- ];
2214
- };
2215
-
1727
+ // The provider rejects the first two calls with context-too-large errors,
1728
+ // then succeeds after the emergency auto-compress runs.
2216
1729
  const ctx = makeCtx({
2217
- agentLoopRun,
1730
+ providerResponses: [
1731
+ new Error("context_length_exceeded"),
1732
+ new Error("context_length_exceeded"),
1733
+ textResponse("auto-recovered"),
1734
+ ],
2218
1735
  hasNoClient: true,
2219
1736
  contextWindowManager: {
2220
1737
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
@@ -2261,7 +1778,6 @@ describe("session-agent-loop", () => {
2261
1778
  // `budget_yield_unrecovered` so the inspector and dashboards can
2262
1779
  // attribute the silent stall.
2263
1780
  const events: ServerMessage[] = [];
2264
- let callCount = 0;
2265
1781
 
2266
1782
  // Reducer exhausts all 4 tiers on first call so the convergence
2267
1783
  // loop runs exactly one iteration before falling through to
@@ -2292,43 +1808,30 @@ describe("session-agent-loop", () => {
2292
1808
  // call). 90k satisfies both so the path reaches call 3.
2293
1809
  mockEstimateTokens = 90_000;
2294
1810
 
2295
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2296
- callCount++;
2297
- if (callCount <= 2) {
2298
- // Calls 1 (initial) and 2 (convergence rerun): error so
2299
- // `state.contextTooLargeDetected` stays true through
2300
- // convergence exit and we enter the auto_compress branch.
2301
- onEvent({
2302
- type: "error",
2303
- error: new Error("context_length_exceeded"),
2304
- });
2305
- onEvent({
2306
- type: "usage",
2307
- inputTokens: 100,
2308
- outputTokens: 0,
2309
- model: "test-model",
2310
- providerDurationMs: 50,
2311
- });
2312
- return messages;
2313
- }
2314
- // Call 3: the auto_compress_latest_turn rerun. Invoke
2315
- // onCheckpoint so the orchestrator's mid-loop budget check
2316
- // flips `yieldedForBudget` to true, then return without
2317
- // finishing — mirroring what AgentLoop.run does when its
2318
- // checkpoint returns "yield".
2319
- if (options?.onCheckpoint) {
2320
- await options.onCheckpoint({
2321
- turnIndex: 0,
2322
- toolCount: 1,
2323
- hasToolUse: true,
2324
- history: messages,
2325
- });
2326
- }
2327
- return messages;
2328
- };
2329
-
1811
+ // Calls 1 (initial) and 2 (convergence rerun) reject with
1812
+ // context-too-large so `contextTooLargeDetected` stays true through the
1813
+ // convergence exit and the orchestrator enters the auto_compress branch.
1814
+ // Call 3 (the auto_compress rerun) is a tool turn: the loop runs it
1815
+ // without a compaction hook, so when its mid-loop budget gate trips on
1816
+ // the still-oversized estimate it yields `exitReason = "budget"` rather
1817
+ // than recovering — the silent-stall path under test.
2330
1818
  const ctx = makeCtx({
2331
- agentLoopRun,
1819
+ providerResponses: [
1820
+ new Error("context_length_exceeded"),
1821
+ new Error("context_length_exceeded"),
1822
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
1823
+ ],
1824
+ loopTools: [
1825
+ {
1826
+ name: "read_file",
1827
+ description: "Read a file",
1828
+ input_schema: {
1829
+ type: "object",
1830
+ properties: { path: { type: "string" } },
1831
+ },
1832
+ },
1833
+ ],
1834
+ toolExecutor: async () => ({ content: "data", isError: false }),
2332
1835
  hasNoClient: true,
2333
1836
  contextWindowManager: {
2334
1837
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
@@ -2411,23 +1914,10 @@ describe("session-agent-loop", () => {
2411
1914
  };
2412
1915
  };
2413
1916
 
2414
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2415
- onEvent({
2416
- type: "error",
2417
- error: new Error("context_length_exceeded"),
2418
- });
2419
- onEvent({
2420
- type: "usage",
2421
- inputTokens: 100,
2422
- outputTokens: 0,
2423
- model: "test-model",
2424
- providerDurationMs: 50,
2425
- });
2426
- return messages;
2427
- };
2428
-
1917
+ // The provider rejects every call with a context-too-large error, so the
1918
+ // orchestrator keeps retrying until it hits the attempt ceiling.
2429
1919
  const ctx = makeCtx({
2430
- agentLoopRun,
1920
+ providerResponses: [new Error("context_length_exceeded")],
2431
1921
  contextWindowManager: {
2432
1922
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2433
1923
  maybeCompact: async () => ({ compacted: false }),
@@ -2443,7 +1933,6 @@ describe("session-agent-loop", () => {
2443
1933
  test("preflight budget evaluation invokes reducer before provider call", async () => {
2444
1934
  const events: ServerMessage[] = [];
2445
1935
  let reducerCalls = 0;
2446
- let agentLoopCalls = 0;
2447
1936
 
2448
1937
  // Set token estimate above budget (100000 * 0.95 = 95000)
2449
1938
  mockEstimateTokens = 96000;
@@ -2462,36 +1951,11 @@ describe("session-agent-loop", () => {
2462
1951
  };
2463
1952
  };
2464
1953
 
2465
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2466
- agentLoopCalls++;
2467
- // Prime the assistant row anchor — production code emits this from
2468
- // `AgentLoop.run` just before `provider.sendMessage`.
2469
- await onEvent({ type: "llm_call_started" });
2470
- onEvent({
2471
- type: "message_complete",
2472
- message: {
2473
- role: "assistant",
2474
- content: [{ type: "text", text: "ok" }],
2475
- },
2476
- });
2477
- onEvent({
2478
- type: "usage",
2479
- inputTokens: 50,
2480
- outputTokens: 25,
2481
- model: "test-model",
2482
- providerDurationMs: 100,
2483
- });
2484
- return [
2485
- ...messages,
2486
- {
2487
- role: "assistant" as const,
2488
- content: [{ type: "text", text: "ok" }] as ContentBlock[],
2489
- },
2490
- ];
2491
- };
2492
-
1954
+ // After the preflight reducer brings the estimate under budget, the loop
1955
+ // completes the turn in a single provider call.
1956
+ const { provider, calls } = createMockProvider([textResponse("ok")]);
2493
1957
  const ctx = makeCtx({
2494
- agentLoopRun,
1958
+ loopProvider: provider,
2495
1959
  contextWindowManager: {
2496
1960
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2497
1961
  maybeCompact: async () => ({ compacted: false }),
@@ -2502,8 +1966,8 @@ describe("session-agent-loop", () => {
2502
1966
 
2503
1967
  // Reducer should have been called during preflight
2504
1968
  expect(reducerCalls).toBeGreaterThanOrEqual(1);
2505
- // Agent loop should still succeed
2506
- expect(agentLoopCalls).toBe(1);
1969
+ // Agent loop should still succeed in a single provider call
1970
+ expect(calls.length).toBe(1);
2507
1971
  const complete = events.find((e) => e.type === "message_complete");
2508
1972
  expect(complete).toBeDefined();
2509
1973
  });
@@ -2512,78 +1976,28 @@ describe("session-agent-loop", () => {
2512
1976
  describe("provider ordering error retry", () => {
2513
1977
  test("retries with deep repair when ordering error is detected", async () => {
2514
1978
  const events: ServerMessage[] = [];
2515
- let callCount = 0;
2516
-
2517
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2518
- // Prime the assistant row anchor — production code emits this from
2519
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2520
- // need this on every invocation: each agent-loop iteration reserves
2521
- // its own row.
2522
- await onEvent({ type: "llm_call_started" });
2523
- callCount++;
2524
- if (callCount === 1) {
2525
- onEvent({
2526
- type: "error",
2527
- error: new Error("messages ordering error"),
2528
- });
2529
- onEvent({
2530
- type: "usage",
2531
- inputTokens: 100,
2532
- outputTokens: 0,
2533
- model: "test-model",
2534
- providerDurationMs: 50,
2535
- });
2536
- return messages;
2537
- }
2538
- // Retry succeeds
2539
- onEvent({
2540
- type: "message_complete",
2541
- message: {
2542
- role: "assistant",
2543
- content: [{ type: "text", text: "fixed" }],
2544
- },
2545
- });
2546
- onEvent({
2547
- type: "usage",
2548
- inputTokens: 50,
2549
- outputTokens: 25,
2550
- model: "test-model",
2551
- providerDurationMs: 100,
2552
- });
2553
- return [
2554
- ...messages,
2555
- {
2556
- role: "assistant" as const,
2557
- content: [{ type: "text", text: "fixed" }] as ContentBlock[],
2558
- },
2559
- ];
2560
- };
2561
1979
 
2562
- const ctx = makeCtx({ agentLoopRun });
1980
+ // The provider rejects the first call with an ordering error, then
1981
+ // succeeds once the orchestrator's deep repair re-sends the turn.
1982
+ const { provider, calls } = createMockProvider([
1983
+ new Error("messages ordering error"),
1984
+ textResponse("fixed"),
1985
+ ]);
1986
+
1987
+ const ctx = makeCtx({ loopProvider: provider });
2563
1988
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2564
1989
 
2565
- expect(callCount).toBe(2);
1990
+ expect(calls.length).toBe(2);
2566
1991
  });
2567
1992
 
2568
1993
  test("emits deferred ordering error when retry also fails", async () => {
2569
1994
  const events: ServerMessage[] = [];
2570
1995
 
2571
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2572
- onEvent({
2573
- type: "error",
2574
- error: new Error("messages ordering error"),
2575
- });
2576
- onEvent({
2577
- type: "usage",
2578
- inputTokens: 100,
2579
- outputTokens: 0,
2580
- model: "test-model",
2581
- providerDurationMs: 50,
2582
- });
2583
- return messages;
2584
- };
2585
-
2586
- const ctx = makeCtx({ agentLoopRun });
1996
+ // The provider rejects every call with an ordering error, so even the
1997
+ // deep-repair retry fails and the orchestrator surfaces the error.
1998
+ const ctx = makeCtx({
1999
+ providerResponses: [new Error("messages ordering error")],
2000
+ });
2587
2001
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2588
2002
 
2589
2003
  const conversationError = events.find(
@@ -2597,62 +2011,18 @@ describe("session-agent-loop", () => {
2597
2011
  test("yields at checkpoint when canHandoffAtCheckpoint returns true", async () => {
2598
2012
  const events: ServerMessage[] = [];
2599
2013
 
2600
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2601
- // Prime the assistant row anchor — production code emits this from
2602
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2603
- // need this on every invocation: each agent-loop iteration reserves
2604
- // its own row.
2605
- await onEvent({ type: "llm_call_started" });
2606
- // Simulate tool use followed by checkpoint
2607
- onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
2608
- onEvent({
2609
- type: "tool_result",
2610
- toolUseId: "tu-1",
2611
- content: "file content",
2612
- isError: false,
2613
- });
2614
- onEvent({
2615
- type: "message_complete",
2616
- message: {
2617
- role: "assistant",
2618
- content: [{ type: "text", text: "partial" }],
2619
- },
2620
- });
2621
- onEvent({
2622
- type: "usage",
2623
- inputTokens: 100,
2624
- outputTokens: 50,
2625
- model: "test-model",
2626
- providerDurationMs: 100,
2627
- });
2628
- if (options?.onCheckpoint) {
2629
- const decision = await options.onCheckpoint({
2630
- turnIndex: 0,
2631
- toolCount: 1,
2632
- hasToolUse: true,
2633
- history: messages,
2634
- });
2635
- if (decision !== "continue") {
2636
- return [
2637
- ...messages,
2638
- {
2639
- role: "assistant" as const,
2640
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2641
- },
2642
- ];
2643
- }
2644
- }
2645
- return [
2646
- ...messages,
2014
+ // A tool turn drives the loop to its first mid-loop checkpoint, where the
2015
+ // orchestrator yields for a queued handoff.
2016
+ const ctx = makeCtx({
2017
+ providerResponses: [toolUseResponse("tu-1", "file_read", {})],
2018
+ loopTools: [
2647
2019
  {
2648
- role: "assistant" as const,
2649
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2020
+ name: "file_read",
2021
+ description: "Read a file",
2022
+ input_schema: { type: "object", properties: {} },
2650
2023
  },
2651
- ];
2652
- };
2653
-
2654
- const ctx = makeCtx({
2655
- agentLoopRun,
2024
+ ],
2025
+ toolExecutor: async () => ({ content: "file content", isError: false }),
2656
2026
  canHandoffAtCheckpoint: () => true,
2657
2027
  } as unknown as Partial<AgentLoopConversationContext>);
2658
2028
 
@@ -2669,52 +2039,21 @@ describe("session-agent-loop", () => {
2669
2039
  test("continues when canHandoffAtCheckpoint returns false", async () => {
2670
2040
  const events: ServerMessage[] = [];
2671
2041
 
2672
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2673
- // Prime the assistant row anchor production code emits this from
2674
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2675
- // need this on every invocation: each agent-loop iteration reserves
2676
- // its own row.
2677
- await onEvent({ type: "llm_call_started" });
2678
- onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
2679
- onEvent({
2680
- type: "tool_result",
2681
- toolUseId: "tu-1",
2682
- content: "content",
2683
- isError: false,
2684
- });
2685
- onEvent({
2686
- type: "message_complete",
2687
- message: {
2688
- role: "assistant",
2689
- content: [{ type: "text", text: "done" }],
2690
- },
2691
- });
2692
- onEvent({
2693
- type: "usage",
2694
- inputTokens: 100,
2695
- outputTokens: 50,
2696
- model: "test-model",
2697
- providerDurationMs: 100,
2698
- });
2699
- if (options?.onCheckpoint) {
2700
- await options.onCheckpoint({
2701
- turnIndex: 0,
2702
- toolCount: 1,
2703
- hasToolUse: true,
2704
- history: messages,
2705
- });
2706
- }
2707
- return [
2708
- ...messages,
2042
+ // The tool turn reaches a checkpoint, but with handoff disabled the loop
2043
+ // continues to the next turn and completes normally.
2044
+ const ctx = makeCtx({
2045
+ providerResponses: [
2046
+ toolUseResponse("tu-1", "file_read", {}),
2047
+ textResponse("done"),
2048
+ ],
2049
+ loopTools: [
2709
2050
  {
2710
- role: "assistant" as const,
2711
- content: [{ type: "text", text: "done" }] as ContentBlock[],
2051
+ name: "file_read",
2052
+ description: "Read a file",
2053
+ input_schema: { type: "object", properties: {} },
2712
2054
  },
2713
- ];
2714
- };
2715
-
2716
- const ctx = makeCtx({
2717
- agentLoopRun,
2055
+ ],
2056
+ toolExecutor: async () => ({ content: "content", isError: false }),
2718
2057
  canHandoffAtCheckpoint: () => false,
2719
2058
  } as unknown as Partial<AgentLoopConversationContext>);
2720
2059
 
@@ -2736,36 +2075,18 @@ describe("session-agent-loop", () => {
2736
2075
  const events: ServerMessage[] = [];
2737
2076
  const abortController = new AbortController();
2738
2077
 
2739
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2740
- // Prime the assistant row anchor production code emits this from
2741
- // `AgentLoop.run` just before `provider.sendMessage`.
2742
- await onEvent({ type: "llm_call_started" });
2743
- onEvent({
2744
- type: "message_complete",
2745
- message: {
2746
- role: "assistant",
2747
- content: [{ type: "text", text: "partial" }],
2748
- },
2749
- });
2750
- onEvent({
2751
- type: "usage",
2752
- inputTokens: 100,
2753
- outputTokens: 50,
2754
- model: "test-model",
2755
- providerDurationMs: 100,
2756
- });
2757
- // Simulate abort after processing
2758
- abortController.abort();
2759
- return [
2760
- ...messages,
2761
- {
2762
- role: "assistant" as const,
2763
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2764
- },
2765
- ];
2078
+ // The provider completes its response but the user cancels mid-turn, so
2079
+ // the orchestrator observes the aborted signal once the loop returns.
2080
+ const provider: Provider = {
2081
+ name: "mock",
2082
+ async sendMessage(_messages, options) {
2083
+ options?.onEvent?.({ type: "text_delta", text: "partial" });
2084
+ abortController.abort();
2085
+ return textResponse("partial");
2086
+ },
2766
2087
  };
2767
2088
 
2768
- const ctx = makeCtx({ agentLoopRun, abortController });
2089
+ const ctx = makeCtx({ loopProvider: provider, abortController });
2769
2090
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2770
2091
 
2771
2092
  const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2776,13 +2097,16 @@ describe("session-agent-loop", () => {
2776
2097
  const events: ServerMessage[] = [];
2777
2098
  const abortController = new AbortController();
2778
2099
 
2779
- const agentLoopRun: AgentLoopRun = async () => {
2780
- abortController.abort();
2781
- const err = new DOMException("The operation was aborted", "AbortError");
2782
- throw err;
2100
+ // The provider rejects with an AbortError after the user cancels.
2101
+ const provider: Provider = {
2102
+ name: "mock",
2103
+ async sendMessage() {
2104
+ abortController.abort();
2105
+ throw new DOMException("The operation was aborted", "AbortError");
2106
+ },
2783
2107
  };
2784
2108
 
2785
- const ctx = makeCtx({ agentLoopRun, abortController });
2109
+ const ctx = makeCtx({ loopProvider: provider, abortController });
2786
2110
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2787
2111
 
2788
2112
  const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2799,36 +2123,17 @@ describe("session-agent-loop", () => {
2799
2123
  const abortController = new AbortController();
2800
2124
  resolveAssistantAttachmentsMock.mockClear();
2801
2125
 
2802
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2803
- // Prime the assistant row anchor — production code emits this from
2804
- // `AgentLoop.run` just before `provider.sendMessage`.
2805
- await onEvent({ type: "llm_call_started" });
2806
- onEvent({
2807
- type: "message_complete",
2808
- message: {
2809
- role: "assistant",
2810
- content: [{ type: "text", text: "partial" }],
2811
- },
2812
- });
2813
- onEvent({
2814
- type: "usage",
2815
- inputTokens: 100,
2816
- outputTokens: 50,
2817
- model: "test-model",
2818
- providerDurationMs: 100,
2819
- });
2820
- // Simulate abort after processing
2821
- abortController.abort();
2822
- return [
2823
- ...messages,
2824
- {
2825
- role: "assistant" as const,
2826
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2827
- },
2828
- ];
2126
+ // The provider completes its response but the user cancels mid-turn.
2127
+ const provider: Provider = {
2128
+ name: "mock",
2129
+ async sendMessage(_messages, options) {
2130
+ options?.onEvent?.({ type: "text_delta", text: "partial" });
2131
+ abortController.abort();
2132
+ return textResponse("partial");
2133
+ },
2829
2134
  };
2830
2135
 
2831
- const ctx = makeCtx({ agentLoopRun, abortController });
2136
+ const ctx = makeCtx({ loopProvider: provider, abortController });
2832
2137
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2833
2138
 
2834
2139
  const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2840,96 +2145,50 @@ describe("session-agent-loop", () => {
2840
2145
 
2841
2146
  describe("finally block cleanup", () => {
2842
2147
  test("increments turnCount after successful run", async () => {
2843
- const ctx = makeCtx({
2844
- agentLoopRun: async (messages, onEvent) => {
2845
- // Prime the assistant row anchor — production code emits this from
2846
- // `AgentLoop.run` just before `provider.sendMessage`.
2847
- await onEvent({ type: "llm_call_started" });
2848
- onEvent({
2849
- type: "message_complete",
2850
- message: {
2851
- role: "assistant",
2852
- content: [{ type: "text", text: "hi" }],
2853
- },
2854
- });
2855
- onEvent({
2856
- type: "usage",
2857
- inputTokens: 10,
2858
- outputTokens: 5,
2859
- model: "test",
2860
- providerDurationMs: 50,
2861
- });
2862
- return [
2863
- ...messages,
2864
- {
2865
- role: "assistant" as const,
2866
- content: [{ type: "text", text: "hi" }] as ContentBlock[],
2867
- },
2868
- ];
2869
- },
2870
- });
2148
+ // GIVEN a real loop that answers in a single text turn
2149
+ const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
2871
2150
  expect(ctx.turnCount).toBe(0);
2872
2151
 
2152
+ // WHEN the orchestrator runs the turn to completion
2873
2153
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
2874
2154
 
2155
+ // THEN the finally block increments the turn count
2875
2156
  expect(ctx.turnCount).toBe(1);
2876
2157
  });
2877
2158
 
2878
2159
  test("clears processing state and abort controller", async () => {
2879
- const ctx = makeCtx({
2880
- agentLoopRun: async (messages, onEvent) => {
2881
- // Prime the assistant row anchor — production code emits this from
2882
- // `AgentLoop.run` just before `provider.sendMessage`.
2883
- await onEvent({ type: "llm_call_started" });
2884
- onEvent({
2885
- type: "message_complete",
2886
- message: {
2887
- role: "assistant",
2888
- content: [{ type: "text", text: "hi" }],
2889
- },
2890
- });
2891
- onEvent({
2892
- type: "usage",
2893
- inputTokens: 10,
2894
- outputTokens: 5,
2895
- model: "test",
2896
- providerDurationMs: 50,
2897
- });
2898
- return [
2899
- ...messages,
2900
- {
2901
- role: "assistant" as const,
2902
- content: [{ type: "text", text: "hi" }] as ContentBlock[],
2903
- },
2904
- ];
2905
- },
2906
- });
2160
+ // GIVEN a real loop that answers in a single text turn
2161
+ const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
2907
2162
 
2163
+ // WHEN the orchestrator runs the turn to completion
2908
2164
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
2909
2165
 
2910
- expect(ctx.processing).toBe(false);
2166
+ // THEN the finally block clears all per-turn processing state
2167
+ expect(ctx.isProcessing()).toBe(false);
2911
2168
  expect(ctx.abortController).toBeNull();
2912
2169
  expect(ctx.currentRequestId).toBeUndefined();
2913
2170
  expect(ctx.commandIntent).toBeUndefined();
2914
2171
  });
2915
2172
 
2916
- test("clears state even when agent loop throws", async () => {
2173
+ test("clears state and surfaces a processing error when the provider call fails", async () => {
2174
+ // GIVEN a real loop whose provider rejects with an unexpected error
2917
2175
  const events: ServerMessage[] = [];
2918
2176
  const ctx = makeCtx({
2919
- agentLoopRun: async () => {
2920
- throw new Error("unexpected crash");
2921
- },
2177
+ loopProvider: {
2178
+ name: "mock-provider",
2179
+ async sendMessage() {
2180
+ throw new Error("unexpected crash");
2181
+ },
2182
+ } as unknown as Provider,
2922
2183
  });
2923
2184
 
2185
+ // WHEN the orchestrator runs the turn
2924
2186
  await runAgentLoopImpl(ctx, "hi", "msg-1", (msg) => events.push(msg));
2925
2187
 
2926
- expect(ctx.processing).toBe(false);
2188
+ // THEN the finally block clears per-turn state and the failure is
2189
+ // surfaced as a processing-failed conversation error
2190
+ expect(ctx.isProcessing()).toBe(false);
2927
2191
  expect(ctx.abortController).toBeNull();
2928
- expect(events.find((event) => event.type === "error")).toMatchObject({
2929
- type: "error",
2930
- code: "CONVERSATION_PROCESSING_FAILED",
2931
- errorCategory: "processing_failed",
2932
- });
2933
2192
  expect(
2934
2193
  events.find((event) => event.type === "conversation_error"),
2935
2194
  ).toMatchObject({
@@ -2940,46 +2199,19 @@ describe("session-agent-loop", () => {
2940
2199
  });
2941
2200
 
2942
2201
  test("drains queue after completion", async () => {
2202
+ // GIVEN a real loop that answers in a single text turn
2943
2203
  let drainReason: string | undefined;
2944
2204
  const ctx = makeCtx({
2945
- agentLoopRun: async (
2946
- messages: Message[],
2947
- onEvent: (event: AgentEvent) => void | Promise<void>,
2948
- ) => {
2949
- // Prime the assistant row anchor — production code emits this from
2950
- // `AgentLoop.run` just before `provider.sendMessage`. Must be
2951
- // awaited so the assistant row is reserved before message_complete
2952
- // tries to write into it.
2953
- await onEvent({ type: "llm_call_started" });
2954
- onEvent({
2955
- type: "message_complete",
2956
- message: {
2957
- role: "assistant",
2958
- content: [{ type: "text", text: "ok" }],
2959
- },
2960
- });
2961
- onEvent({
2962
- type: "usage",
2963
- inputTokens: 10,
2964
- outputTokens: 5,
2965
- model: "test",
2966
- providerDurationMs: 50,
2967
- });
2968
- return [
2969
- ...messages,
2970
- {
2971
- role: "assistant" as const,
2972
- content: [{ type: "text", text: "ok" }] as ContentBlock[],
2973
- },
2974
- ];
2975
- },
2205
+ providerResponses: [textResponse("ok")],
2976
2206
  drainQueue: (reason: string) => {
2977
2207
  drainReason = reason;
2978
2208
  },
2979
2209
  } as unknown as Partial<AgentLoopConversationContext>);
2980
2210
 
2211
+ // WHEN the orchestrator runs the turn to completion
2981
2212
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
2982
2213
 
2214
+ // THEN the queue is drained with the loop-complete reason
2983
2215
  expect(drainReason).toBe("loop_complete");
2984
2216
  });
2985
2217
  });
@@ -3098,7 +2330,7 @@ describe("session-agent-loop", () => {
3098
2330
  isUserMessage: true,
3099
2331
  });
3100
2332
 
3101
- expect(ctx.processing).toBe(false);
2333
+ expect(ctx.isProcessing()).toBe(false);
3102
2334
  expect(ctx.abortController).toBeNull();
3103
2335
  expect(ctx.currentRequestId).toBeUndefined();
3104
2336
  });
@@ -3208,24 +2440,17 @@ describe("session-agent-loop", () => {
3208
2440
  test("synthesizes error assistant message when provider returns no response", async () => {
3209
2441
  const events: ServerMessage[] = [];
3210
2442
 
3211
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3212
- // Emit a non-ordering, non-context-too-large error that sets providerErrorUserMessage
3213
- onEvent({
3214
- type: "error",
3215
- error: new Error("Internal processing failure"),
3216
- });
3217
- onEvent({
3218
- type: "usage",
3219
- inputTokens: 100,
3220
- outputTokens: 0,
3221
- model: "test-model",
3222
- providerDurationMs: 50,
3223
- });
3224
- // Return same messages (no assistant message appended)
3225
- return messages;
3226
- };
3227
-
3228
- const ctx = makeCtx({ agentLoopRun });
2443
+ // GIVEN a real loop whose provider rejects with a generic error
2444
+ // (non-ordering, non-context-too-large) so the loop emits `error` and
2445
+ // the orchestrator sets `providerErrorUserMessage`.
2446
+ const ctx = makeCtx({
2447
+ loopProvider: {
2448
+ name: "mock-provider",
2449
+ async sendMessage() {
2450
+ throw new Error("Internal processing failure");
2451
+ },
2452
+ } as unknown as Provider,
2453
+ });
3229
2454
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
3230
2455
 
3231
2456
  // The error should be sent as a conversation_error (not as an
@@ -3249,26 +2474,19 @@ describe("session-agent-loop", () => {
3249
2474
  // sweep would wrong-attach this row to the wrong assistant message.
3250
2475
  const events: ServerMessage[] = [];
3251
2476
 
3252
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3253
- // 1) handleProviderError -> writes an `llm_request_logs` row with
3254
- // messageId=null (the orphan we are trying to link).
3255
- onEvent({
3256
- type: "provider_error",
3257
- error: new Error("upstream 500"),
3258
- rawRequest: { model: "gpt-4.1", messages: [] },
3259
- actualProvider: "openai",
3260
- });
3261
- // 2) handleError -> sets `state.providerErrorUserMessage`, which
3262
- // activates the synthetic-message branch below the loop.
3263
- onEvent({
3264
- type: "error",
3265
- error: new Error("upstream 500"),
3266
- });
3267
- // Provider returned no assistant content — same messages back.
3268
- return messages;
3269
- };
3270
-
3271
- const ctx = makeCtx({ agentLoopRun });
2477
+ // GIVEN a real loop whose provider rejects: the loop emits
2478
+ // `provider_error` (writing an `llm_request_logs` row with
2479
+ // messageId=null the orphan we link) then `error` (which sets
2480
+ // `state.providerErrorUserMessage`, activating the synthetic-message
2481
+ // branch below the loop).
2482
+ const ctx = makeCtx({
2483
+ loopProvider: {
2484
+ name: "mock-provider",
2485
+ async sendMessage() {
2486
+ throw new Error("upstream 500");
2487
+ },
2488
+ } as unknown as Provider,
2489
+ });
3272
2490
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
3273
2491
 
3274
2492
  // The orphan was written with messageId=undefined.
@@ -3315,39 +2533,10 @@ describe("session-agent-loop", () => {
3315
2533
  // observe the sync-invalidation publish path on the same turn.
3316
2534
  projectAssistantMessageMock.mockImplementationOnce(() => true);
3317
2535
 
3318
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3319
- await onEvent({ type: "llm_call_started" });
3320
- // `message_complete` is awaited so `handleMessageComplete` (and its
3321
- // async indexer + projector chain) completes before the next event
3322
- // or before the loop returns. Without the await the projector's
3323
- // synchronous call still races against the test's assertion phase
3324
- // because the indexer's `await` yields microtasks.
3325
- await onEvent({
3326
- type: "message_complete",
3327
- message: {
3328
- role: "assistant",
3329
- content: [{ type: "text", text: "indexed reply" }],
3330
- },
3331
- });
3332
- onEvent({
3333
- type: "usage",
3334
- inputTokens: 10,
3335
- outputTokens: 5,
3336
- model: "test",
3337
- providerDurationMs: 50,
3338
- });
3339
- return [
3340
- ...messages,
3341
- {
3342
- role: "assistant" as const,
3343
- content: [
3344
- { type: "text", text: "indexed reply" },
3345
- ] as ContentBlock[],
3346
- },
3347
- ];
3348
- };
3349
-
3350
- const ctx = makeCtx({ agentLoopRun });
2536
+ // GIVEN a real loop that answers with a single finalized assistant turn
2537
+ const ctx = makeCtx({
2538
+ providerResponses: [textResponse("indexed reply")],
2539
+ });
3351
2540
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3352
2541
 
3353
2542
  // Indexer fired with the reserved row's id + the finalized content.
@@ -3410,34 +2599,8 @@ describe("session-agent-loop", () => {
3410
2599
  metadata: null,
3411
2600
  };
3412
2601
 
3413
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3414
- await onEvent({ type: "llm_call_started" });
3415
- // See sibling test — `message_complete` must be awaited so the
3416
- // projector call lands before the assertion phase.
3417
- await onEvent({
3418
- type: "message_complete",
3419
- message: {
3420
- role: "assistant",
3421
- content: [{ type: "text", text: "quiet" }],
3422
- },
3423
- });
3424
- onEvent({
3425
- type: "usage",
3426
- inputTokens: 1,
3427
- outputTokens: 1,
3428
- model: "test",
3429
- providerDurationMs: 1,
3430
- });
3431
- return [
3432
- ...messages,
3433
- {
3434
- role: "assistant" as const,
3435
- content: [{ type: "text", text: "quiet" }] as ContentBlock[],
3436
- },
3437
- ];
3438
- };
3439
-
3440
- const ctx = makeCtx({ agentLoopRun });
2602
+ // GIVEN a real loop that answers with a single finalized assistant turn
2603
+ const ctx = makeCtx({ providerResponses: [textResponse("quiet")] });
3441
2604
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3442
2605
 
3443
2606
  expect(projectAssistantMessageMock).toHaveBeenCalledTimes(1);
@@ -3462,40 +2625,33 @@ describe("session-agent-loop", () => {
3462
2625
  // Indexer/projector mocks default to no-op; no finalized row in this
3463
2626
  // test, so `mockMessageById` stays null.
3464
2627
 
3465
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3466
- // First LLM call: reserve msg-strand-A, never finalize.
3467
- await onEvent({ type: "llm_call_started" });
3468
- // Second LLM call: should delete msg-strand-A before reserving
3469
- // msg-strand-B.
3470
- await onEvent({ type: "llm_call_started" });
3471
- // Finalize the second one so the loop has a valid assistant message
3472
- // and exits cleanly.
3473
- onEvent({
3474
- type: "message_complete",
3475
- message: {
3476
- role: "assistant",
3477
- content: [{ type: "text", text: "retry succeeded" }],
3478
- },
3479
- });
3480
- onEvent({
3481
- type: "usage",
3482
- inputTokens: 5,
3483
- outputTokens: 3,
3484
- model: "test",
3485
- providerDurationMs: 25,
3486
- });
3487
- return [
3488
- ...messages,
3489
- {
3490
- role: "assistant" as const,
3491
- content: [
3492
- { type: "text", text: "retry succeeded" },
3493
- ] as ContentBlock[],
3494
- },
3495
- ];
3496
- };
2628
+ // A single reducer tier converges the oversized context so the
2629
+ // orchestrator re-enters the loop after the first call fails.
2630
+ mockReducerStepFn = (msgs: Message[]) => ({
2631
+ messages: msgs,
2632
+ tier: "forced_compaction",
2633
+ state: {
2634
+ appliedTiers: ["forced_compaction"],
2635
+ injectionMode: "full",
2636
+ exhausted: false,
2637
+ },
2638
+ estimatedTokens: 5000,
2639
+ });
3497
2640
 
3498
- const ctx = makeCtx({ agentLoopRun });
2641
+ // GIVEN a real loop whose first call rejects with context-too-large
2642
+ // (reserving msg-strand-A but never finalizing it), then recovers via
2643
+ // convergence on re-entry. The re-entry's `llm_call_started` must
2644
+ // delete the stranded msg-strand-A before reserving msg-strand-B.
2645
+ const ctx = makeCtx({
2646
+ providerResponses: [
2647
+ new Error("context_length_exceeded"),
2648
+ textResponse("retry succeeded"),
2649
+ ],
2650
+ contextWindowManager: {
2651
+ shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2652
+ maybeCompact: async () => ({ compacted: false }),
2653
+ } as unknown as AgentLoopConversationContext["contextWindowManager"],
2654
+ });
3499
2655
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3500
2656
 
3501
2657
  // Exactly one delete fires — for msg-strand-A, before the second
@@ -3523,27 +2679,20 @@ describe("session-agent-loop", () => {
3523
2679
  id: "msg-orphaned-reservation",
3524
2680
  }));
3525
2681
 
3526
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3527
- // Reserve the orphan.
3528
- await onEvent({ type: "llm_call_started" });
3529
- // Provider rejects writes the llm_request_log row and arms
3530
- // `state.providerErrorUserMessage` via `handleError`.
3531
- onEvent({
3532
- type: "provider_error",
3533
- error: new Error("upstream 500"),
3534
- rawRequest: { model: "gpt-4.1", messages: [] },
3535
- actualProvider: "openai",
3536
- });
3537
- onEvent({
3538
- type: "error",
3539
- error: new Error("upstream 500"),
3540
- });
3541
- // No assistant message in the result — the synthetic-error branch
3542
- // below the agent loop fires.
3543
- return messages;
3544
- };
3545
-
3546
- const ctx = makeCtx({ agentLoopRun });
2682
+ // GIVEN a real loop that reserves an assistant row at
2683
+ // `llm_call_started`, then whose provider rejects: the loop emits
2684
+ // `provider_error` (writing the llm_request_log row) and `error`
2685
+ // (arming `state.providerErrorUserMessage`), exiting with no
2686
+ // `message_complete` so the synthetic-error branch below the loop
2687
+ // fires.
2688
+ const ctx = makeCtx({
2689
+ loopProvider: {
2690
+ name: "mock-provider",
2691
+ async sendMessage() {
2692
+ throw new Error("upstream 500");
2693
+ },
2694
+ } as unknown as Provider,
2695
+ });
3547
2696
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3548
2697
 
3549
2698
  // The orphan was deleted exactly once, before the synthetic error
@@ -3599,40 +2748,23 @@ describe("session-agent-loop", () => {
3599
2748
  metadata: null,
3600
2749
  };
3601
2750
 
3602
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3603
- await onEvent({ type: "llm_call_started" });
3604
- // Two small deltas well under the 1024-char size gate — should
3605
- // schedule a single debounced flush.
3606
- onEvent({ type: "text_delta", text: "Hello, " });
3607
- onEvent({ type: "text_delta", text: "world." });
3608
- // Wait long enough for the 250ms debounce to fire.
3609
- await new Promise((resolve) => setTimeout(resolve, 1100));
3610
- await onEvent({
3611
- type: "message_complete",
3612
- message: {
3613
- role: "assistant",
3614
- content: [{ type: "text", text: "Hello, world." }],
3615
- },
3616
- });
3617
- onEvent({
3618
- type: "usage",
3619
- inputTokens: 10,
3620
- outputTokens: 5,
3621
- model: "test",
3622
- providerDurationMs: 50,
3623
- });
3624
- return [
3625
- ...messages,
3626
- {
3627
- role: "assistant" as const,
3628
- content: [
3629
- { type: "text", text: "Hello, world." },
3630
- ] as ContentBlock[],
2751
+ // GIVEN a real loop whose provider streams two small deltas (each under
2752
+ // the 1024-char size gate) then holds the turn open past the 250ms
2753
+ // debounce window before completing, so a single debounced partial
2754
+ // flush lands before `message_complete`.
2755
+ const ctx = makeCtx({
2756
+ loopProvider: {
2757
+ name: "mock-provider",
2758
+ async sendMessage(_messages, options) {
2759
+ options?.onEvent?.({ type: "text_delta", text: "Hello, " });
2760
+ options?.onEvent?.({ type: "text_delta", text: "world." });
2761
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2762
+ return textResponse("Hello, world.");
3631
2763
  },
3632
- ];
3633
- };
2764
+ },
2765
+ });
3634
2766
 
3635
- const ctx = makeCtx({ agentLoopRun });
2767
+ // WHEN the orchestrator runs the turn to completion
3636
2768
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3637
2769
 
3638
2770
  // Exactly two `updateContent` calls land:
@@ -3668,70 +2800,38 @@ describe("session-agent-loop", () => {
3668
2800
  metadata: null,
3669
2801
  };
3670
2802
 
3671
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3672
- await onEvent({ type: "llm_call_started" });
3673
- // No text delta only a tool_use. If `handleToolUse` were
3674
- // flushing, this would land a partial write before
3675
- // `message_complete`.
3676
- onEvent({
3677
- type: "tool_use",
3678
- id: "tu-no-flush",
3679
- name: "file_read",
3680
- input: { path: "/foo" },
3681
- });
3682
- // Yield a microtask so any (incorrectly) fire-and-forget
3683
- // pipeline call has a chance to land before message_complete.
3684
- await new Promise((resolve) => setImmediate(resolve));
3685
- onEvent({
3686
- type: "tool_result",
3687
- toolUseId: "tu-no-flush",
3688
- content: "ok",
3689
- isError: false,
3690
- });
3691
- await onEvent({
3692
- type: "message_complete",
3693
- message: {
3694
- role: "assistant",
3695
- content: [
3696
- {
3697
- type: "tool_use",
3698
- id: "tu-no-flush",
3699
- name: "file_read",
3700
- input: { path: "/foo" },
3701
- },
3702
- ],
3703
- },
3704
- });
3705
- onEvent({
3706
- type: "usage",
3707
- inputTokens: 10,
3708
- outputTokens: 5,
3709
- model: "test",
3710
- providerDurationMs: 50,
3711
- });
3712
- return [
3713
- ...messages,
2803
+ // GIVEN a real loop that runs one tool turn — the loop emits `tool_use`
2804
+ // strictly AFTER `message_complete` — and then answers with a final
2805
+ // text turn. The tool executor returns immediately.
2806
+ const ctx = makeCtx({
2807
+ providerResponses: [
2808
+ toolUseResponse("tu-no-flush", "file_read", { path: "/foo" }),
2809
+ textResponse("done"),
2810
+ ],
2811
+ loopTools: [
3714
2812
  {
3715
- role: "assistant" as const,
3716
- content: [
3717
- {
3718
- type: "tool_use",
3719
- id: "tu-no-flush",
3720
- name: "file_read",
3721
- input: { path: "/foo" },
3722
- },
3723
- ] as ContentBlock[],
2813
+ name: "file_read",
2814
+ description: "Read a file",
2815
+ input_schema: {
2816
+ type: "object",
2817
+ properties: { path: { type: "string" } },
2818
+ },
3724
2819
  },
3725
- ];
3726
- };
2820
+ ],
2821
+ toolExecutor: async () => ({ content: "ok", isError: false }),
2822
+ });
3727
2823
 
3728
- const ctx = makeCtx({ agentLoopRun });
2824
+ // WHEN the orchestrator runs the turn to completion
3729
2825
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3730
2826
 
3731
- // Only the authoritative final flush from `handleMessageComplete`
3732
- // lands. A partial flush from `handleToolUse` would have made this
3733
- // 2; that's the regression this test guards against.
3734
- expect(updateMessageContentMock).toHaveBeenCalledTimes(1);
2827
+ // Four authoritative writes land and no stray partial flush:
2828
+ // - one final flush per `message_complete` (the tool turn and the final
2829
+ // text turn), plus
2830
+ // - two grouped tool-result user-row writes (persist-on-arrival and the
2831
+ // turn-boundary finalize).
2832
+ // `handleToolUse` contributes no partial flush of its own; one would make
2833
+ // this 5. That stray flush is the regression this test guards against.
2834
+ expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
3735
2835
  });
3736
2836
 
3737
2837
  test("handleMessageComplete clears any pending debounce timer before the final flush", async () => {
@@ -3744,45 +2844,53 @@ describe("session-agent-loop", () => {
3744
2844
  metadata: null,
3745
2845
  };
3746
2846
 
3747
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3748
- await onEvent({ type: "llm_call_started" });
3749
- // Short delta schedules a debounce timer but does NOT trip the
3750
- // size gate. message_complete then arrives immediately after,
3751
- // before the 250ms timer can fire.
3752
- onEvent({ type: "text_delta", text: "Quick reply." });
3753
- await onEvent({
3754
- type: "message_complete",
3755
- message: {
3756
- role: "assistant",
3757
- content: [{ type: "text", text: "Quick reply." }],
2847
+ // GIVEN a real loop whose first turn streams a short delta (scheduling a
2848
+ // debounce timer) and completes as a tool turn — so `message_complete`
2849
+ // arrives before the 250ms timer and clears it. The tool executor then
2850
+ // holds the loop open well past the original debounce window, proving a
2851
+ // late timer does NOT fire a stray partial flush, before a final text
2852
+ // turn ends the run.
2853
+ const ctx = makeCtx({
2854
+ providerResponses: [
2855
+ {
2856
+ content: [
2857
+ { type: "text", text: "Quick reply." },
2858
+ {
2859
+ type: "tool_use",
2860
+ id: "tu-keep-alive",
2861
+ name: "file_read",
2862
+ input: {},
2863
+ },
2864
+ ],
2865
+ model: "mock-model",
2866
+ usage: { inputTokens: 10, outputTokens: 5 },
2867
+ stopReason: "tool_use",
3758
2868
  },
3759
- });
3760
- onEvent({
3761
- type: "usage",
3762
- inputTokens: 10,
3763
- outputTokens: 5,
3764
- model: "test",
3765
- providerDurationMs: 50,
3766
- });
3767
- // Wait past the original debounce window to prove a late timer
3768
- // does NOT fire a stray partial flush.
3769
- await new Promise((resolve) => setTimeout(resolve, 1100));
3770
- return [
3771
- ...messages,
2869
+ textResponse("done"),
2870
+ ],
2871
+ loopTools: [
3772
2872
  {
3773
- role: "assistant" as const,
3774
- content: [{ type: "text", text: "Quick reply." }] as ContentBlock[],
2873
+ name: "file_read",
2874
+ description: "Read a file",
2875
+ input_schema: { type: "object", properties: {} },
3775
2876
  },
3776
- ];
3777
- };
2877
+ ],
2878
+ toolExecutor: async () => {
2879
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2880
+ return { content: "ok", isError: false };
2881
+ },
2882
+ });
3778
2883
 
3779
- const ctx = makeCtx({ agentLoopRun });
2884
+ // WHEN the orchestrator runs the turn to completion
3780
2885
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3781
2886
 
3782
- // Only the final flush from `handleMessageComplete` lands. The
3783
- // debounced partial would have fired around T+250ms; the timer-clear
3784
- // at the top of `handleMessageComplete` cancels it.
3785
- expect(updateMessageContentMock).toHaveBeenCalledTimes(1);
2887
+ // Four authoritative writes land: one final flush per `message_complete`
2888
+ // (the tool turn and the final text turn) plus two grouped tool-result
2889
+ // user-row writes (persist-on-arrival and the turn-boundary finalize).
2890
+ // The debounced partial would have fired around T+250ms — during the tool
2891
+ // executor's hold — but the timer-clear at the top of
2892
+ // `handleMessageComplete` cancels it, so no stray fifth flush appears.
2893
+ expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
3786
2894
  });
3787
2895
 
3788
2896
  test("partial flushes never trigger the indexer or attention projector", async () => {
@@ -3795,54 +2903,29 @@ describe("session-agent-loop", () => {
3795
2903
  metadata: null,
3796
2904
  };
3797
2905
 
3798
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3799
- await onEvent({ type: "llm_call_started" });
3800
- onEvent({ type: "text_delta", text: "hello world" });
3801
- // Wait past the 250ms debounce so the partial flush definitely
3802
- // lands BEFORE message_complete fires.
3803
- await new Promise((resolve) => setTimeout(resolve, 1100));
3804
- // Snapshot the indexer/projector call counts AFTER the partial
3805
- // flush has run but BEFORE message_complete. They must be zero.
3806
- const indexerCallsBeforeComplete =
3807
- indexMessageNowMock.mock.calls.length;
3808
- const projectorCallsBeforeComplete =
3809
- projectAssistantMessageMock.mock.calls.length;
3810
- // Stash on a side channel the assertion phase can read.
3811
- (
3812
- ctx as unknown as { __partialSnapshot?: [number, number] }
3813
- ).__partialSnapshot = [
3814
- indexerCallsBeforeComplete,
3815
- projectorCallsBeforeComplete,
3816
- ];
3817
- await onEvent({
3818
- type: "message_complete",
3819
- message: {
3820
- role: "assistant",
3821
- content: [{ type: "text", text: "hello world" }],
3822
- },
3823
- });
3824
- onEvent({
3825
- type: "usage",
3826
- inputTokens: 10,
3827
- outputTokens: 5,
3828
- model: "test",
3829
- providerDurationMs: 50,
3830
- });
3831
- return [
3832
- ...messages,
3833
- {
3834
- role: "assistant" as const,
3835
- content: [{ type: "text", text: "hello world" }] as ContentBlock[],
2906
+ // GIVEN a real loop whose provider streams a delta then holds the turn
2907
+ // open past the 250ms debounce window so the partial flush lands BEFORE
2908
+ // `message_complete`. The indexer/projector counts are snapshotted at
2909
+ // that mid-turn point (after the partial flush, before completion).
2910
+ let snapshot: [number, number] | undefined;
2911
+ const ctx = makeCtx({
2912
+ loopProvider: {
2913
+ name: "mock-provider",
2914
+ async sendMessage(_messages, options) {
2915
+ options?.onEvent?.({ type: "text_delta", text: "hello world" });
2916
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2917
+ snapshot = [
2918
+ indexMessageNowMock.mock.calls.length,
2919
+ projectAssistantMessageMock.mock.calls.length,
2920
+ ];
2921
+ return textResponse("hello world");
3836
2922
  },
3837
- ];
3838
- };
2923
+ },
2924
+ });
3839
2925
 
3840
- const ctx = makeCtx({ agentLoopRun });
2926
+ // WHEN the orchestrator runs the turn to completion
3841
2927
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3842
2928
 
3843
- const snapshot = (
3844
- ctx as unknown as { __partialSnapshot?: [number, number] }
3845
- ).__partialSnapshot;
3846
2929
  expect(snapshot).toBeDefined();
3847
2930
  // Indexer + projector were both ZERO during the mid-turn partial
3848
2931
  // flush — they only fire from `handleMessageComplete` after the
@@ -3870,35 +2953,21 @@ describe("session-agent-loop", () => {
3870
2953
  const ghToken = "ghp_" + "a".repeat(36);
3871
2954
  const payload = "Here's the key: " + ghToken + " enjoy.";
3872
2955
 
3873
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3874
- await onEvent({ type: "llm_call_started" });
3875
- onEvent({ type: "text_delta", text: payload });
3876
- // Wait past the 250ms debounce so the partial flush lands.
3877
- await new Promise((resolve) => setTimeout(resolve, 1100));
3878
- await onEvent({
3879
- type: "message_complete",
3880
- message: {
3881
- role: "assistant",
3882
- content: [{ type: "text", text: payload }],
3883
- },
3884
- });
3885
- onEvent({
3886
- type: "usage",
3887
- inputTokens: 10,
3888
- outputTokens: 5,
3889
- model: "test",
3890
- providerDurationMs: 50,
3891
- });
3892
- return [
3893
- ...messages,
3894
- {
3895
- role: "assistant" as const,
3896
- content: [{ type: "text", text: payload }] as ContentBlock[],
2956
+ // GIVEN a real loop whose provider streams the PAT-bearing payload as a
2957
+ // delta then holds the turn open past the 250ms debounce window so the
2958
+ // partial flush lands before `message_complete`.
2959
+ const ctx = makeCtx({
2960
+ loopProvider: {
2961
+ name: "mock-provider",
2962
+ async sendMessage(_messages, options) {
2963
+ options?.onEvent?.({ type: "text_delta", text: payload });
2964
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2965
+ return textResponse(payload);
3897
2966
  },
3898
- ];
3899
- };
2967
+ },
2968
+ });
3900
2969
 
3901
- const ctx = makeCtx({ agentLoopRun });
2970
+ // WHEN the orchestrator runs the turn to completion
3902
2971
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3903
2972
 
3904
2973
  expect(updateMessageContentMock).toHaveBeenCalledTimes(2);
@@ -3922,26 +2991,21 @@ describe("session-agent-loop", () => {
3922
2991
  id: "msg-orphan-with-partial",
3923
2992
  }));
3924
2993
 
3925
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3926
- await onEvent({ type: "llm_call_started" });
3927
- // A debounced delta lands a partial flush BEFORE the provider
3928
- // error fires.
3929
- onEvent({ type: "text_delta", text: "hello world" });
3930
- await new Promise((resolve) => setTimeout(resolve, 1100));
3931
- onEvent({
3932
- type: "provider_error",
3933
- error: new Error("upstream 500"),
3934
- rawRequest: { model: "gpt-4.1", messages: [] },
3935
- actualProvider: "openai",
3936
- });
3937
- onEvent({
3938
- type: "error",
3939
- error: new Error("upstream 500"),
3940
- });
3941
- return messages;
3942
- };
2994
+ // GIVEN a real loop whose provider streams a delta — landing a debounced
2995
+ // partial flush on the reserved row — then rejects, so the loop emits
2996
+ // `provider_error` and `error` and exits with no `message_complete`.
2997
+ const ctx = makeCtx({
2998
+ loopProvider: {
2999
+ name: "mock-provider",
3000
+ async sendMessage(_messages, options) {
3001
+ options?.onEvent?.({ type: "text_delta", text: "hello world" });
3002
+ await new Promise((resolve) => setTimeout(resolve, 1100));
3003
+ throw new Error("upstream 500");
3004
+ },
3005
+ },
3006
+ });
3943
3007
 
3944
- const ctx = makeCtx({ agentLoopRun });
3008
+ // WHEN the orchestrator runs the turn
3945
3009
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3946
3010
 
3947
3011
  // Partial flush fired exactly once (before the provider error).
@@ -4442,51 +3506,32 @@ describe("session-agent-loop", () => {
4442
3506
  compactableStartIndex: 0,
4443
3507
  };
4444
3508
 
4445
- const rawMidLoopBasis: Message[] = [
4446
- {
4447
- role: "user",
4448
- content: [{ type: "text", text: "fresh DB basis user row" }],
4449
- },
4450
- {
4451
- role: "assistant",
4452
- content: [{ type: "text", text: "partial assistant response" }],
4453
- },
4454
- ];
4455
3509
  const maybeCompactInputs: Message[][] = [];
4456
- let runCount = 0;
4457
- const agentLoopRun: AgentLoopRun = async (
4458
- messages,
4459
- _onEvent,
4460
- options,
4461
- ) => {
4462
- runCount++;
4463
- if (runCount === 1) {
4464
- // The loop reaches its mid-loop budget checkpoint with the raw
4465
- // persistent basis as its in-loop history; the wrapped onCheckpoint
4466
- // trips the gate and runs inline compaction over that basis.
4467
- mockEstimateTokens = 90_000;
4468
- const decision = await options?.onCheckpoint?.({
4469
- turnIndex: 0,
4470
- toolCount: 1,
4471
- hasToolUse: true,
4472
- history: rawMidLoopBasis,
4473
- });
4474
- mockEstimateTokens = 1000;
4475
- if (decision !== "continue") {
4476
- return rawMidLoopBasis;
4477
- }
4478
- }
4479
- return [
4480
- ...messages,
4481
- {
4482
- role: "assistant" as const,
4483
- content: [{ type: "text" as const, text: "final response" }],
4484
- },
4485
- ];
4486
- };
4487
3510
 
3511
+ // AND a real loop that runs one tool turn and then a final text turn.
3512
+ // The tool executor raises the token estimate above the mid-loop budget
3513
+ // threshold so the loop compacts in place at the post-tool checkpoint —
3514
+ // over its own in-loop history, which does not match the loaded Slack
3515
+ // rows.
4488
3516
  const ctx = makeCtx({
4489
- agentLoopRun,
3517
+ providerResponses: [
3518
+ toolUseResponse("tu-mid-loop", "file_read", { path: "/foo" }),
3519
+ textResponse("final response"),
3520
+ ],
3521
+ loopTools: [
3522
+ {
3523
+ name: "file_read",
3524
+ description: "Read a file",
3525
+ input_schema: {
3526
+ type: "object",
3527
+ properties: { path: { type: "string" } },
3528
+ },
3529
+ },
3530
+ ],
3531
+ toolExecutor: async () => {
3532
+ mockEstimateTokens = 90_000;
3533
+ return { content: "ok", isError: false };
3534
+ },
4490
3535
  channelCapabilities: {
4491
3536
  channel: "slack",
4492
3537
  dashboardCapable: false,
@@ -4523,6 +3568,9 @@ describe("session-agent-loop", () => {
4523
3568
  summaryText: "",
4524
3569
  };
4525
3570
  }
3571
+ // The mid-loop gate compacted its in-loop basis; drop the estimate
3572
+ // back under budget so the post-compaction provider call proceeds.
3573
+ mockEstimateTokens = 1000;
4526
3574
  return {
4527
3575
  compacted: true,
4528
3576
  messages: [
@@ -4551,7 +3599,9 @@ describe("session-agent-loop", () => {
4551
3599
  await runAgentLoopImpl(ctx, "next reply", "user-msg-mid-loop", () => {});
4552
3600
 
4553
3601
  expect(maybeCompactInputs[0]).toBe(renderedSlackMessages);
4554
- expect(maybeCompactInputs[1]).toBe(rawMidLoopBasis);
3602
+ // The mid-loop gate compacts the loop's own in-loop history, never the
3603
+ // loaded Slack rows — the mismatch this test guards against.
3604
+ expect(maybeCompactInputs[1]).not.toBe(renderedSlackMessages);
4555
3605
  expect(getSlackCompactionWatermarkForPrefixMock).toHaveBeenCalledWith(
4556
3606
  null,
4557
3607
  2,
@@ -4824,67 +3874,32 @@ describe("session-agent-loop", () => {
4824
3874
  estimatedTokens: 5000,
4825
3875
  });
4826
3876
 
4827
- let callCount = 0;
4828
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
4829
- callCount++;
4830
- // Prime the assistant row anchor production code emits this from
4831
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
4832
- // need this on every invocation: each agent-loop iteration reserves
4833
- // its own row.
4834
- await onEvent({ type: "llm_call_started" });
4835
- if (callCount === 1) {
4836
- // Trigger convergence path: error + appended assistant message so
4837
- // updatedHistory.length > preRunHistoryLength at the strip site.
4838
- onEvent({
4839
- type: "error",
4840
- error: new Error("context_length_exceeded"),
4841
- });
4842
- onEvent({
4843
- type: "usage",
4844
- inputTokens: 100,
4845
- outputTokens: 0,
4846
- model: "test-model",
4847
- providerDurationMs: 50,
4848
- });
4849
- return [
4850
- ...messages,
4851
- {
4852
- role: "assistant" as const,
4853
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
4854
- },
4855
- ];
4856
- }
4857
- onEvent({
4858
- type: "message_complete",
4859
- message: {
4860
- role: "assistant",
4861
- content: [{ type: "text", text: "recovered" }],
4862
- },
4863
- });
4864
- onEvent({
4865
- type: "usage",
4866
- inputTokens: 50,
4867
- outputTokens: 25,
4868
- model: "test-model",
4869
- providerDurationMs: 100,
4870
- });
4871
- return [
4872
- ...messages,
3877
+ // GIVEN a real loop that appends a tool turn (so the run reports
3878
+ // `appendedNewMessages`) and then rejects with a context-too-large
3879
+ // error on the following call — the orchestrator strips that appended
3880
+ // history during its bounded convergence path before a final call
3881
+ // recovers.
3882
+ const ctx = makeCtx({
3883
+ providerResponses: [
3884
+ toolUseResponse("t1", "file_read", {}),
3885
+ new Error("context_length_exceeded"),
3886
+ textResponse("recovered"),
3887
+ ],
3888
+ loopTools: [
4873
3889
  {
4874
- role: "assistant" as const,
4875
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
3890
+ name: "file_read",
3891
+ description: "Read a file",
3892
+ input_schema: { type: "object", properties: {} },
4876
3893
  },
4877
- ];
4878
- };
4879
-
4880
- const ctx = makeCtx({
4881
- agentLoopRun,
3894
+ ],
3895
+ toolExecutor: async () => ({ content: "ok", isError: false }),
4882
3896
  contextWindowManager: {
4883
3897
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
4884
3898
  maybeCompact: async () => ({ compacted: false }),
4885
3899
  } as unknown as AgentLoopConversationContext["contextWindowManager"],
4886
3900
  });
4887
3901
 
3902
+ // WHEN the orchestrator runs the turn to completion
4888
3903
  await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
4889
3904
 
4890
3905
  const stripCalls = setConversationHistoryStrippedAtMock.mock.calls.filter(
@@ -4909,59 +3924,24 @@ describe("session-agent-loop", () => {
4909
3924
  estimatedTokens: 5000,
4910
3925
  });
4911
3926
 
4912
- let callCount = 0;
4913
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
4914
- callCount++;
4915
- // Prime the assistant row anchor — production code emits this from
4916
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
4917
- // need this on every invocation: each agent-loop iteration reserves
4918
- // its own row.
4919
- await onEvent({ type: "llm_call_started" });
4920
- if (callCount === 1) {
4921
- onEvent({
4922
- type: "error",
4923
- error: new Error("context_length_exceeded"),
4924
- });
4925
- onEvent({
4926
- type: "usage",
4927
- inputTokens: 100,
4928
- outputTokens: 0,
4929
- model: "test-model",
4930
- providerDurationMs: 50,
4931
- });
4932
- return [
4933
- ...messages,
4934
- {
4935
- role: "assistant" as const,
4936
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
4937
- },
4938
- ];
4939
- }
4940
- onEvent({
4941
- type: "message_complete",
4942
- message: {
4943
- role: "assistant",
4944
- content: [{ type: "text", text: "recovered" }],
4945
- },
4946
- });
4947
- onEvent({
4948
- type: "usage",
4949
- inputTokens: 50,
4950
- outputTokens: 25,
4951
- model: "test-model",
4952
- providerDurationMs: 100,
4953
- });
4954
- return [
4955
- ...messages,
3927
+ // GIVEN a real loop that appends a tool turn and then rejects with a
3928
+ // context-too-large error on the following call, driving the
3929
+ // convergence strip whose marker-write helper is stubbed to throw,
3930
+ // before a final call recovers.
3931
+ const ctx = makeCtx({
3932
+ providerResponses: [
3933
+ toolUseResponse("t1", "file_read", {}),
3934
+ new Error("context_length_exceeded"),
3935
+ textResponse("recovered"),
3936
+ ],
3937
+ loopTools: [
4956
3938
  {
4957
- role: "assistant" as const,
4958
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
3939
+ name: "file_read",
3940
+ description: "Read a file",
3941
+ input_schema: { type: "object", properties: {} },
4959
3942
  },
4960
- ];
4961
- };
4962
-
4963
- const ctx = makeCtx({
4964
- agentLoopRun,
3943
+ ],
3944
+ toolExecutor: async () => ({ content: "ok", isError: false }),
4965
3945
  contextWindowManager: {
4966
3946
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
4967
3947
  maybeCompact: async () => ({ compacted: false }),