@vellumai/assistant 0.8.7 → 0.8.8-dev.202606052332.17fc8ea

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (570) hide show
  1. package/Dockerfile +20 -4
  2. package/bun.lock +2 -2
  3. package/docker-entrypoint.sh +4 -2
  4. package/docker-init-apt-root.sh +3 -1
  5. package/docker-kata-apt-env.sh +3 -1
  6. package/docker-kata-runtime-family.sh +12 -0
  7. package/docs/architecture/memory.md +1 -1
  8. package/examples/plugins/echo/README.md +61 -66
  9. package/examples/plugins/echo/hooks/post-tool-use.ts +18 -0
  10. package/examples/plugins/echo/hooks/stop.ts +16 -0
  11. package/examples/plugins/echo/hooks/user-prompt-submit.ts +18 -0
  12. package/examples/plugins/echo/package.json +1 -2
  13. package/examples/plugins/echo/src/emit.ts +19 -0
  14. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  15. package/node_modules/@vellumai/skill-host-contracts/src/skill-host.ts +7 -6
  16. package/openapi.yaml +3378 -335
  17. package/package.json +2 -2
  18. package/scripts/generate-openapi.ts +68 -41
  19. package/src/__tests__/agent-loop-exit-reason.test.ts +35 -93
  20. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  21. package/src/__tests__/agent-loop.test.ts +37 -87
  22. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  23. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  24. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  25. package/src/__tests__/anthropic-provider.test.ts +95 -2
  26. package/src/__tests__/app-control-flow.test.ts +1 -1
  27. package/src/__tests__/app-dir-path-guard.test.ts +1 -0
  28. package/src/__tests__/approval-routes-http.test.ts +4 -1
  29. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  30. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  31. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  32. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  33. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  34. package/src/__tests__/btw-routes.test.ts +62 -3
  35. package/src/__tests__/build-persisted-content.test.ts +184 -0
  36. package/src/__tests__/catalog-files.test.ts +1 -1
  37. package/src/__tests__/channel-approval-routes.test.ts +1 -1
  38. package/src/__tests__/channel-approvals.test.ts +1 -1
  39. package/src/__tests__/clawhub-files.test.ts +1 -1
  40. package/src/__tests__/compaction-circuit.test.ts +258 -0
  41. package/src/__tests__/compaction-direct.test.ts +132 -0
  42. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  43. package/src/__tests__/config-watcher.test.ts +1 -1
  44. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  45. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -5
  46. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -7
  47. package/src/__tests__/conversation-agent-loop-overflow.test.ts +316 -1143
  48. package/src/__tests__/conversation-agent-loop.test.ts +638 -1655
  49. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  50. package/src/__tests__/conversation-clean-command.test.ts +5 -2
  51. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  52. package/src/__tests__/conversation-pairing.test.ts +4 -31
  53. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  54. package/src/__tests__/conversation-provider-retry-repair.test.ts +30 -10
  55. package/src/__tests__/conversation-queue.test.ts +2 -0
  56. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  57. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  58. package/src/__tests__/conversation-runtime-assembly.test.ts +310 -300
  59. package/src/__tests__/conversation-runtime-workspace.test.ts +105 -45
  60. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  61. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  62. package/src/__tests__/conversation-starter-routes.test.ts +14 -6
  63. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  64. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  65. package/src/__tests__/conversation-title-service.test.ts +135 -2
  66. package/src/__tests__/conversation-workspace-cache-state.test.ts +17 -16
  67. package/src/__tests__/conversation-workspace-injection.test.ts +67 -2
  68. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +7 -6
  69. package/src/__tests__/conversations-import-system-filter.test.ts +101 -0
  70. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  71. package/src/__tests__/db-acp-history.test.ts +101 -0
  72. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  73. package/src/__tests__/dm-persistence.test.ts +5 -1
  74. package/src/__tests__/dynamic-page-surface.test.ts +31 -0
  75. package/src/__tests__/empty-response-hook.test.ts +304 -0
  76. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  77. package/src/__tests__/file-write-tool.test.ts +63 -0
  78. package/src/__tests__/gateway-only-guard.test.ts +12 -2
  79. package/src/__tests__/gemini-image-service.test.ts +13 -0
  80. package/src/__tests__/guardian-grant-minting.test.ts +1 -1
  81. package/src/__tests__/guardian-routing-invariants.test.ts +2 -4
  82. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
  83. package/src/__tests__/heartbeat-disk-pressure.test.ts +1 -0
  84. package/src/__tests__/heartbeat-service.test.ts +1 -0
  85. package/src/__tests__/helpers/mock-provider.ts +110 -0
  86. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  87. package/src/__tests__/history-repair-hook.test.ts +1 -0
  88. package/src/__tests__/host-app-control-routes.test.ts +1 -1
  89. package/src/__tests__/host-cu-routes-targeted.test.ts +3 -3
  90. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  91. package/src/__tests__/identity-routes.test.ts +248 -7
  92. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  93. package/src/__tests__/injector-background-turn.test.ts +3 -9
  94. package/src/__tests__/injector-chain.test.ts +139 -275
  95. package/src/__tests__/injector-disk-pressure.test.ts +75 -41
  96. package/src/__tests__/injector-document-comments.test.ts +3 -3
  97. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  98. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  99. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  100. package/src/__tests__/list-messages-hidden-metadata.test.ts +38 -0
  101. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  102. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  103. package/src/__tests__/llm-usage-store.test.ts +223 -1
  104. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  105. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  106. package/src/__tests__/native-web-search.test.ts +191 -0
  107. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  108. package/src/__tests__/openai-image-service.test.ts +17 -0
  109. package/src/__tests__/openai-provider.test.ts +31 -1
  110. package/src/__tests__/{overflow-reduce-pipeline.test.ts → overflow-reduction-loop.test.ts} +64 -284
  111. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  112. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  113. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  114. package/src/__tests__/plugin-api-shim.test.ts +3 -6
  115. package/src/__tests__/plugin-bootstrap.test.ts +14 -40
  116. package/src/__tests__/plugin-registry.test.ts +3 -76
  117. package/src/__tests__/plugin-types.test.ts +0 -193
  118. package/src/__tests__/process-message-display-content.test.ts +6 -2
  119. package/src/__tests__/reaction-persistence.test.ts +1 -1
  120. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  121. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  122. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  123. package/src/__tests__/schedule-routes.test.ts +603 -2
  124. package/src/__tests__/schedule-store.test.ts +41 -0
  125. package/src/__tests__/schedule-tools.test.ts +35 -0
  126. package/src/__tests__/send-endpoint-busy.test.ts +4 -1
  127. package/src/__tests__/server-history-render.test.ts +314 -1
  128. package/src/__tests__/skill-feature-flags-integration.test.ts +33 -0
  129. package/src/__tests__/skillssh-files.test.ts +1 -1
  130. package/src/__tests__/subagent-call-site-routing.test.ts +1 -1
  131. package/src/__tests__/subagent-fork-notifications.test.ts +1 -3
  132. package/src/__tests__/subagent-fork-spawn.test.ts +1 -1
  133. package/src/__tests__/subagent-manager-notify.test.ts +1 -3
  134. package/src/__tests__/subagent-notify-parent.test.ts +1 -3
  135. package/src/__tests__/subagent-spawn-tool-fork.test.ts +1 -1
  136. package/src/__tests__/system-prompt.test.ts +20 -0
  137. package/src/__tests__/task-scheduler.test.ts +162 -1
  138. package/src/__tests__/terminal-tools.test.ts +6 -1
  139. package/src/__tests__/title-generate-hook.test.ts +319 -0
  140. package/src/__tests__/tool-error-hook.test.ts +278 -0
  141. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  142. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  143. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  144. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  145. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  146. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  147. package/src/__tests__/usage-routes.test.ts +285 -1
  148. package/src/__tests__/user-plugin-loader.test.ts +54 -286
  149. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  150. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  151. package/src/acp/__tests__/agent-process.test.ts +161 -0
  152. package/src/acp/__tests__/client-handler.test.ts +40 -0
  153. package/src/acp/__tests__/helpers/acp-history-db.ts +82 -0
  154. package/src/acp/__tests__/helpers/exec-file-stub.ts +101 -0
  155. package/src/acp/__tests__/prepare-agent-env.test.ts +137 -0
  156. package/src/acp/__tests__/session-manager-persistence.test.ts +95 -28
  157. package/src/acp/__tests__/session-manager-resume.test.ts +736 -0
  158. package/src/acp/agent-process.ts +61 -1
  159. package/src/acp/auto-install.test.ts +196 -0
  160. package/src/acp/auto-install.ts +177 -0
  161. package/src/acp/client-handler.ts +31 -0
  162. package/src/acp/feature-gate.test.ts +48 -0
  163. package/src/acp/feature-gate.ts +34 -0
  164. package/src/acp/prepare-agent-env.ts +83 -29
  165. package/src/acp/resolve-agent.test.ts +320 -7
  166. package/src/acp/resolve-agent.ts +182 -18
  167. package/src/acp/resume-hint.ts +25 -0
  168. package/src/acp/session-manager.ts +495 -73
  169. package/src/acp/types.ts +8 -0
  170. package/src/agent/compaction-circuit.ts +60 -102
  171. package/src/agent/loop.ts +362 -485
  172. package/src/api/events/assistant-thinking-delta.ts +33 -0
  173. package/src/api/events/tool-output-chunk.ts +45 -0
  174. package/src/api/events/tool-use-preview-start.ts +32 -0
  175. package/src/api/events/trace-event.ts +69 -0
  176. package/src/api/index.ts +48 -13
  177. package/src/api/responses/conversation-message.ts +374 -0
  178. package/src/approvals/guardian-request-resolvers.ts +1 -1
  179. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  180. package/src/background-wake/next-wake.ts +1 -0
  181. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  182. package/src/cli/commands/notifications.ts +112 -60
  183. package/src/config/__tests__/feature-flag-registry-guard.test.ts +2 -2
  184. package/src/config/acp-defaults.test.ts +10 -0
  185. package/src/config/acp-defaults.ts +6 -0
  186. package/src/config/assistant-feature-flags.ts +22 -11
  187. package/src/config/bundled-skills/acp/SKILL.md +83 -31
  188. package/src/config/bundled-skills/acp/TOOLS.json +4 -4
  189. package/src/config/bundled-skills/app-builder/SKILL.md +224 -398
  190. package/src/config/bundled-skills/app-builder/TOOLS.json +29 -0
  191. package/src/config/bundled-skills/app-builder/references/DESIGN_SYSTEM.md +48 -0
  192. package/src/config/bundled-skills/app-builder/references/RESPONSIVE.md +57 -0
  193. package/src/config/bundled-skills/app-builder/references/SLIDES.md +38 -0
  194. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  195. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  196. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  197. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  198. package/src/config/bundled-skills/app-builder/tools/app-list.ts +62 -0
  199. package/src/config/bundled-skills/document-editor/SKILL.md +28 -23
  200. package/src/config/bundled-skills/document-editor/TOOLS.json +1 -1
  201. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  202. package/src/config/bundled-tool-registry.ts +2 -0
  203. package/src/config/feature-flag-cache.ts +3 -3
  204. package/src/config/feature-flag-registry.json +48 -7
  205. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  206. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  207. package/src/config/schemas/heartbeat.ts +9 -0
  208. package/src/config/schemas/llm.ts +1 -0
  209. package/src/config/schemas/memory-v2.ts +8 -0
  210. package/src/config/schemas/memory-v3.ts +8 -0
  211. package/src/config/schemas/platform.ts +8 -0
  212. package/src/config/seed-inference-profiles.ts +2 -2
  213. package/src/config/skills.ts +13 -0
  214. package/src/context/compactor.ts +1 -1
  215. package/src/context/strip-injections.ts +128 -0
  216. package/src/context/token-estimator.ts +23 -0
  217. package/src/context/tool-result-truncation.ts +0 -23
  218. package/src/context/window-manager.ts +5 -7
  219. package/src/credential-execution/executable-discovery.ts +16 -0
  220. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  221. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  222. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  223. package/src/daemon/assistant-attachments.ts +1 -1
  224. package/src/daemon/config-watcher.ts +2 -2
  225. package/src/daemon/context-overflow-reducer.ts +0 -1
  226. package/src/daemon/conversation-agent-loop-handlers.ts +594 -153
  227. package/src/daemon/conversation-agent-loop.ts +301 -997
  228. package/src/daemon/conversation-history.ts +5 -4
  229. package/src/daemon/conversation-lifecycle.ts +3 -4
  230. package/src/daemon/conversation-messaging.ts +7 -6
  231. package/src/daemon/conversation-process.ts +11 -16
  232. package/src/daemon/conversation-registry.ts +159 -0
  233. package/src/daemon/conversation-runtime-assembly.ts +218 -398
  234. package/src/daemon/conversation-slash.ts +6 -25
  235. package/src/daemon/conversation-store.ts +9 -90
  236. package/src/daemon/conversation-surfaces.ts +222 -4
  237. package/src/daemon/conversation-tool-setup.ts +2 -29
  238. package/src/daemon/conversation-workspace.ts +17 -0
  239. package/src/daemon/conversation.ts +32 -20
  240. package/src/daemon/external-plugins-bootstrap.ts +17 -18
  241. package/src/daemon/handlers/config-a2a.ts +51 -36
  242. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  243. package/src/daemon/handlers/config-telegram.ts +16 -2
  244. package/src/daemon/handlers/conversations.ts +3 -1
  245. package/src/daemon/handlers/shared.ts +156 -84
  246. package/src/daemon/handlers/skills.ts +42 -10
  247. package/src/daemon/lifecycle.ts +25 -0
  248. package/src/daemon/message-types/apps.ts +1 -29
  249. package/src/daemon/message-types/messages.ts +9 -57
  250. package/src/daemon/message-types/skills.ts +2 -0
  251. package/src/daemon/message-types/surfaces.ts +136 -3
  252. package/src/daemon/now-scratchpad.ts +21 -0
  253. package/src/daemon/orphan-reaper.test.ts +210 -0
  254. package/src/daemon/orphan-reaper.ts +240 -0
  255. package/src/daemon/overflow-reduction-loop.ts +230 -0
  256. package/src/daemon/persist-unsendable-image.ts +117 -0
  257. package/src/daemon/process-message.ts +1 -3
  258. package/src/daemon/server.ts +2 -0
  259. package/src/daemon/trace-emitter.ts +6 -4
  260. package/src/daemon/trust-context.ts +19 -0
  261. package/src/daemon/wake-target-adapter.ts +3 -1
  262. package/src/heartbeat/__tests__/heartbeat-service.test.ts +3 -0
  263. package/src/heartbeat/heartbeat-run-store.ts +23 -1
  264. package/src/heartbeat/heartbeat-service.ts +26 -0
  265. package/src/home/home-greeting-cache.ts +24 -1
  266. package/src/ipc/__tests__/browser-ipc.test.ts +1 -1
  267. package/src/ipc/__tests__/ui-request-route.test.ts +3 -3
  268. package/src/ipc/gateway-client.test.ts +2 -2
  269. package/src/ipc/gateway-client.ts +3 -3
  270. package/src/ipc/skill-routes/__tests__/memory.test.ts +15 -0
  271. package/src/ipc/skill-routes/memory.ts +4 -2
  272. package/src/media/gemini-image-service.ts +15 -0
  273. package/src/media/openai-image-service.ts +14 -0
  274. package/src/media/types.ts +34 -0
  275. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  276. package/src/memory/auth-fallback-events-store.ts +94 -0
  277. package/src/memory/conversation-starter-checkpoints.ts +1 -0
  278. package/src/memory/conversation-title-service.ts +65 -41
  279. package/src/memory/db-init.ts +6 -0
  280. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  281. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  282. package/src/memory/job-handlers/conversation-starters.ts +13 -2
  283. package/src/memory/jobs-store.ts +33 -0
  284. package/src/memory/jobs-worker.ts +32 -5
  285. package/src/memory/llm-usage-store.ts +224 -50
  286. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  287. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  288. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  289. package/src/memory/migrations/272-acp-session-history-cwd.ts +36 -0
  290. package/src/memory/migrations/index.ts +3 -0
  291. package/src/memory/pkb/autoinject.ts +61 -0
  292. package/src/memory/pkb/context.ts +50 -0
  293. package/src/memory/pkb/types.ts +14 -0
  294. package/src/memory/schedule-attribution-sql.ts +104 -0
  295. package/src/memory/schema/acp.ts +4 -0
  296. package/src/memory/schema/infrastructure.ts +16 -0
  297. package/src/memory/usage-grouped-buckets.ts +6 -1
  298. package/src/memory/v2/__tests__/consolidation-job.test.ts +4 -4
  299. package/src/memory/v2/consolidation-job.ts +14 -5
  300. package/src/notifications/conversation-pairing.ts +8 -15
  301. package/src/notifications/decision-engine.ts +6 -3
  302. package/src/notifications/home-feed-side-effect.ts +12 -1
  303. package/src/permissions/prompter.ts +4 -0
  304. package/src/plugin-api/constants.ts +4 -0
  305. package/src/plugin-api/index.ts +7 -5
  306. package/src/plugin-api/types.ts +151 -1
  307. package/src/plugins/defaults/compaction/compact.ts +59 -0
  308. package/src/plugins/defaults/compaction/package.json +1 -1
  309. package/src/plugins/defaults/compaction/register.ts +8 -19
  310. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  311. package/src/plugins/defaults/empty-response/register.ts +8 -13
  312. package/src/plugins/defaults/index.ts +2 -18
  313. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +95 -0
  314. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  315. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  316. package/src/plugins/defaults/{injectors/register.ts → memory-retrieval/injectors.ts} +288 -81
  317. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/assign.test.ts +4 -4
  318. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/health.test.ts +16 -0
  319. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/live-integration.test.ts +4 -4
  320. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/maintain-job.test.ts +5 -5
  321. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/orchestrate.test.ts +48 -12
  322. package/src/plugins/defaults/memory-v3-shadow/__tests__/provider-blocks.test.ts +13 -0
  323. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/reconcile.test.ts +2 -2
  324. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/render-injection.test.ts +1 -1
  325. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/router.test.ts +104 -32
  326. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selection-log-store.test.ts +8 -8
  327. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selector.test.ts +96 -30
  328. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/shadow-plugin.test.ts +34 -16
  329. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/assign.ts +5 -5
  330. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/capabilities.ts +2 -2
  331. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/health.ts +0 -0
  332. package/src/plugins/defaults/memory-v3-shadow/hooks/post-compact.ts +14 -0
  333. package/src/plugins/defaults/memory-v3-shadow/hooks/user-prompt-submit.ts +19 -0
  334. package/src/plugins/defaults/memory-v3-shadow/injector.ts +75 -0
  335. package/src/plugins/defaults/memory-v3-shadow/llm-retry.ts +32 -0
  336. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/maintain-job.ts +8 -8
  337. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/orchestrate.ts +26 -14
  338. package/src/plugins/defaults/{llm-call → memory-v3-shadow}/package.json +2 -2
  339. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/page-content.ts +2 -2
  340. package/src/plugins/defaults/memory-v3-shadow/provider-blocks.ts +26 -0
  341. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/reconcile.ts +3 -3
  342. package/src/plugins/defaults/memory-v3-shadow/register.ts +26 -0
  343. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/render-injection.ts +1 -1
  344. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/router.ts +51 -45
  345. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selection-log-store.ts +4 -4
  346. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selector.ts +61 -46
  347. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/shadow-plugin.ts +69 -99
  348. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/tree.ts +1 -1
  349. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/types.ts +8 -0
  350. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  351. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  352. package/src/plugins/defaults/title-generate/package.json +1 -1
  353. package/src/plugins/defaults/title-generate/register.ts +18 -18
  354. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  355. package/src/plugins/defaults/tool-error/package.json +1 -1
  356. package/src/plugins/defaults/tool-error/register.ts +9 -21
  357. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  358. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  359. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  360. package/src/plugins/external-api.ts +2 -2
  361. package/src/plugins/pipeline.ts +6 -305
  362. package/src/plugins/registry.ts +10 -55
  363. package/src/plugins/types.ts +62 -797
  364. package/src/plugins/user-loader.ts +30 -127
  365. package/src/proactive-artifact/aux-message-injector.ts +4 -4
  366. package/src/proactive-artifact/job.test.ts +8 -13
  367. package/src/prompts/__tests__/system-prompt.test.ts +42 -0
  368. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +64 -0
  369. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  370. package/src/prompts/templates/system-sections.ts +15 -0
  371. package/src/providers/anthropic/client.ts +37 -29
  372. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  373. package/src/providers/openai/chat-completions-provider.ts +44 -0
  374. package/src/providers/openrouter/client.ts +1 -0
  375. package/src/providers/placeholder-sentinels.ts +35 -0
  376. package/src/runtime/__tests__/agent-wake.test.ts +10 -6
  377. package/src/runtime/__tests__/interactive-ui.test.ts +1 -1
  378. package/src/runtime/agent-wake.ts +2 -5
  379. package/src/runtime/assistant-event-hub.ts +37 -7
  380. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  381. package/src/runtime/channel-approvals.ts +1 -1
  382. package/src/runtime/http-router.ts +16 -21
  383. package/src/runtime/http-types.ts +16 -70
  384. package/src/runtime/interactive-ui.ts +1 -1
  385. package/src/runtime/pending-interactions.ts +1 -0
  386. package/src/runtime/routes/__tests__/acp-routes.test.ts +283 -55
  387. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  388. package/src/runtime/routes/__tests__/conversation-list-routes.test.ts +1 -1
  389. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  390. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  391. package/src/runtime/routes/__tests__/surface-action-routes.test.ts +5 -4
  392. package/src/runtime/routes/__tests__/surface-content-routes.test.ts +4 -1
  393. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  394. package/src/runtime/routes/acp-routes.test.ts +89 -25
  395. package/src/runtime/routes/acp-routes.ts +81 -29
  396. package/src/runtime/routes/app-management-routes.ts +6 -117
  397. package/src/runtime/routes/app-routes.ts +13 -15
  398. package/src/runtime/routes/approval-routes.ts +1 -1
  399. package/src/runtime/routes/attachment-routes.ts +26 -15
  400. package/src/runtime/routes/avatar-routes.ts +26 -0
  401. package/src/runtime/routes/browser-routes.ts +1 -1
  402. package/src/runtime/routes/browser-tabs-routes.ts +6 -10
  403. package/src/runtime/routes/btw-routes.ts +29 -23
  404. package/src/runtime/routes/consolidation-routes.ts +120 -20
  405. package/src/runtime/routes/conversation-cli-routes.ts +1 -1
  406. package/src/runtime/routes/conversation-list-routes.ts +1 -1
  407. package/src/runtime/routes/conversation-query-routes.ts +3 -1
  408. package/src/runtime/routes/conversation-routes.ts +372 -185
  409. package/src/runtime/routes/conversation-starter-routes.ts +13 -7
  410. package/src/runtime/routes/conversations-import-routes.ts +24 -7
  411. package/src/runtime/routes/documents-routes.ts +4 -0
  412. package/src/runtime/routes/domain-routes.ts +51 -37
  413. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  414. package/src/runtime/routes/events-routes.ts +28 -34
  415. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  416. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  417. package/src/runtime/routes/host-app-control-routes.ts +1 -1
  418. package/src/runtime/routes/host-cu-routes.ts +1 -1
  419. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  420. package/src/runtime/routes/identity-routes.ts +224 -18
  421. package/src/runtime/routes/image-generation-routes.ts +40 -2
  422. package/src/runtime/routes/inbound-message-handler.ts +1 -1
  423. package/src/runtime/routes/index.ts +2 -0
  424. package/src/runtime/routes/integrations/a2a.ts +12 -10
  425. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  426. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  427. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  428. package/src/runtime/routes/integrations/telegram.ts +6 -0
  429. package/src/runtime/routes/integrations/twilio.ts +42 -0
  430. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  431. package/src/runtime/routes/log-export-routes.ts +8 -0
  432. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  433. package/src/runtime/routes/memory-v3-routes.ts +66 -34
  434. package/src/runtime/routes/oauth-apps.ts +66 -12
  435. package/src/runtime/routes/oauth-providers.ts +44 -5
  436. package/src/runtime/routes/platform-routes.ts +81 -5
  437. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  438. package/src/runtime/routes/playground/force-compact.ts +1 -1
  439. package/src/runtime/routes/playground/helpers.ts +1 -1
  440. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  441. package/src/runtime/routes/schedule-routes.ts +152 -42
  442. package/src/runtime/routes/secret-routes.ts +14 -2
  443. package/src/runtime/routes/skills-routes.ts +43 -14
  444. package/src/runtime/routes/surface-conversation-resolver.ts +4 -3
  445. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  446. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  447. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  448. package/src/runtime/routes/tts-routes.ts +35 -0
  449. package/src/runtime/routes/types.ts +66 -8
  450. package/src/runtime/routes/usage-routes.ts +47 -39
  451. package/src/runtime/routes/webhook-routes.ts +41 -2
  452. package/src/runtime/routes/work-items-routes.ts +2 -4
  453. package/src/runtime/routes/workspace-routes.ts +4 -0
  454. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  455. package/src/runtime/services/analyze-conversation.ts +2 -2
  456. package/src/runtime/services/conversation-serializer.ts +1 -1
  457. package/src/schedule/schedule-store.ts +20 -1
  458. package/src/schedule/schedule-usage-store.ts +83 -0
  459. package/src/schedule/scheduler.ts +12 -5
  460. package/src/signals/cancel.ts +2 -4
  461. package/src/skills/catalog-files.ts +2 -2
  462. package/src/skills/catalog-install.ts +3 -0
  463. package/src/skills/categories-cache.ts +118 -0
  464. package/src/skills/clawhub-files.ts +1 -2
  465. package/src/skills/skillssh-files.ts +1 -2
  466. package/src/subagent/manager.ts +17 -5
  467. package/src/telemetry/types.ts +29 -1
  468. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  469. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  470. package/src/tools/acp/context.ts +20 -0
  471. package/src/tools/acp/list-agents.test.ts +7 -1
  472. package/src/tools/acp/spawn.test.ts +158 -55
  473. package/src/tools/acp/spawn.ts +47 -72
  474. package/src/tools/acp/steer.test.ts +105 -8
  475. package/src/tools/acp/steer.ts +48 -17
  476. package/src/tools/apps/executors.ts +13 -8
  477. package/src/tools/executor.ts +1 -53
  478. package/src/tools/filesystem/write.ts +34 -0
  479. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  480. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  481. package/src/tools/network/web-search-error.test.ts +248 -0
  482. package/src/tools/network/web-search-error.ts +267 -0
  483. package/src/tools/network/web-search.ts +207 -48
  484. package/src/tools/schedule/create.ts +2 -0
  485. package/src/tools/subagent/spawn.ts +2 -4
  486. package/src/tools/terminal/safe-env.ts +10 -1
  487. package/src/tools/ui-surface/definitions.ts +34 -5
  488. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  489. package/src/tts/provider-catalog.ts +76 -1
  490. package/src/util/mutex.ts +47 -0
  491. package/src/workspace/git-service.ts +1 -42
  492. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +4 -5
  493. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  494. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  495. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +117 -0
  496. package/src/workspace/migrations/registry.ts +6 -0
  497. package/docs/plugins.md +0 -836
  498. package/examples/plugins/echo/register.ts +0 -184
  499. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  500. package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -405
  501. package/src/__tests__/compaction-pipeline.test.ts +0 -210
  502. package/src/__tests__/compaction-timeout-recovery.test.ts +0 -251
  503. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  504. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  505. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  506. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  507. package/src/__tests__/pipeline-runner.test.ts +0 -564
  508. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  509. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  510. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  511. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  512. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  513. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  514. package/src/gallery/default-gallery.ts +0 -1359
  515. package/src/gallery/gallery-manifest.ts +0 -28
  516. package/src/home/feature-gate.ts +0 -22
  517. package/src/memory/v3/provider-blocks.ts +0 -16
  518. package/src/plugins/defaults/circuit-breaker/middlewares/circuitBreaker.ts +0 -93
  519. package/src/plugins/defaults/circuit-breaker/package.json +0 -15
  520. package/src/plugins/defaults/circuit-breaker/register.ts +0 -39
  521. package/src/plugins/defaults/compaction/middlewares/compaction.ts +0 -25
  522. package/src/plugins/defaults/compaction/terminal.ts +0 -73
  523. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  524. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  525. package/src/plugins/defaults/injectors/package.json +0 -15
  526. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  527. package/src/plugins/defaults/llm-call/register.ts +0 -45
  528. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  529. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  530. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  531. package/src/plugins/defaults/overflow-reduce/middlewares/overflowReduce.ts +0 -126
  532. package/src/plugins/defaults/overflow-reduce/package.json +0 -15
  533. package/src/plugins/defaults/overflow-reduce/register.ts +0 -42
  534. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  535. package/src/plugins/defaults/persistence/package.json +0 -15
  536. package/src/plugins/defaults/persistence/register.ts +0 -38
  537. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  538. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  539. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  540. package/src/plugins/defaults/token-estimate/package.json +0 -15
  541. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  542. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  543. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  544. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  545. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  546. package/src/plugins/defaults/tool-execute/package.json +0 -15
  547. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  548. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  549. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  550. package/src/skills/category-inference.ts +0 -111
  551. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/capabilities.test.ts +0 -0
  552. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/core.test.ts +0 -0
  553. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/eval-turns.json +0 -0
  554. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/live-turns.json +0 -0
  555. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/needle.test.ts +0 -0
  556. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/snapshot.test.ts +0 -0
  557. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/tree.test.ts +0 -0
  558. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/types.test.ts +0 -0
  559. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-eviction.test.ts +0 -0
  560. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-skeleton.test.ts +0 -0
  561. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/core.ts +0 -0
  562. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/README.md +0 -0
  563. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/assignments.json +0 -0
  564. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/core.json +0 -0
  565. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-x.md +0 -0
  566. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-y.md +0 -0
  567. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-b/topic-z.md +0 -0
  568. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/needle.ts +0 -0
  569. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/snapshot.ts +0 -0
  570. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/working-set.ts +0 -0
@@ -1,26 +1,18 @@
1
1
  import { createRequire } from "node:module";
2
- import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
3
-
4
- import { CompactionCircuit } from "../agent/compaction-circuit.js";
5
- import type {
6
- AgentEvent,
7
- AgentLoopRunOptions,
8
- AgentLoopRunResult,
9
- MidLoopCompaction,
10
- } from "../agent/loop.js";
11
- import type { ContextWindowResult } from "../context/window-manager.js";
2
+ import {
3
+ afterAll,
4
+ beforeEach,
5
+ describe,
6
+ expect,
7
+ mock,
8
+ spyOn,
9
+ test,
10
+ } from "bun:test";
11
+
12
+ import type { LoopToolExecutor } from "../agent/loop.js";
12
13
  import type { ServerMessage } from "../daemon/message-protocol.js";
13
- import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
14
14
  import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
15
- import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
16
- import { getMiddlewaresFor } from "../plugins/registry.js";
17
- import type {
18
- CompactionArgs,
19
- CompactionResult,
20
- TurnContext,
21
- } from "../plugins/types.js";
22
- import { PluginTimeoutError } from "../plugins/types.js";
23
- import type { ContentBlock, Message } from "../providers/types.js";
15
+ import type { Message, Provider, ToolDefinition } from "../providers/types.js";
24
16
 
25
17
  const conversationCrudRealSnapshot = {
26
18
  ...(createRequire(import.meta.url)(
@@ -76,6 +68,7 @@ mock.module("../config/loader.js", () => ({
76
68
  memory: { retrieval: { scratchpadInjection: { enabled: true } } },
77
69
  ui: mockUiConfig,
78
70
  compaction: { enabled: true, autoThreshold: 0.7 },
71
+ conversations: { skipAutoRetitling: true },
79
72
  }),
80
73
  loadRawConfig: () => ({}),
81
74
  saveRawConfig: () => {},
@@ -86,17 +79,20 @@ mock.module("../config/loader.js", () => ({
86
79
 
87
80
  // Token estimator returns a small value by default (well within budget)
88
81
  // so preflight does not trigger unless the test overrides it. Both the
89
- // calibrated entry point (`estimatePromptTokens`, used in the convergence
90
- // path) and the raw entry point (`estimatePromptTokensRaw`, used by the
91
- // default `tokenEstimate` plugin pipeline for preflight/mid-loop) are
82
+ // calibrated entry point (`estimatePromptTokens`, which backs the preflight
83
+ // overflow gate and the convergence path) and the raw entry point
84
+ // (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
92
85
  // stubbed so either call site can drive the test.
93
86
  let mockEstimateTokens = 1000;
94
87
  mock.module("../context/token-estimator.js", () => ({
95
88
  estimatePromptTokens: () => mockEstimateTokens,
96
89
  estimatePromptTokensRaw: () => mockEstimateTokens,
97
- // Pass-through: the default plugin computes `toolTokenBudget` via this
98
- // helper before delegating to the raw estimator. Return 0 so the mocked
99
- // raw estimate is not perturbed.
90
+ // The preflight overflow gate calls this calibrated wrapper directly, so it
91
+ // must honor `mockEstimateTokens` too rather than fall through to the real
92
+ // implementation.
93
+ estimatePromptTokensWithTools: () => mockEstimateTokens,
94
+ // Pass-through: `estimatePromptTokensWithTools` computes `toolTokenBudget`
95
+ // via this helper. Return 0 so the mocked estimate is not perturbed.
100
96
  estimateToolsTokens: () => 0,
101
97
  }));
102
98
 
@@ -320,12 +316,14 @@ const buildUnifiedTurnContextBlockMock = mock(
320
316
  (options: Record<string, unknown>) =>
321
317
  `<turn_context>\ncurrent_time: ${String(options.timestamp)}\n</turn_context>`,
322
318
  );
323
- const applyRuntimeInjectionsMock = mock(
324
- async (msgs: Message[], _options?: unknown) => ({
325
- messages: msgs,
326
- blocks: { ...mockInjectionBlocks },
327
- }),
328
- );
319
+ const defaultApplyRuntimeInjectionsImpl = async (
320
+ msgs: Message[],
321
+ _options?: unknown,
322
+ ) => ({
323
+ messages: msgs,
324
+ blocks: { ...mockInjectionBlocks },
325
+ });
326
+ const applyRuntimeInjectionsMock = mock(defaultApplyRuntimeInjectionsImpl);
329
327
  let mockSlackChronologicalContext: {
330
328
  renderedMessages: Array<{
331
329
  message: Message;
@@ -364,15 +362,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
364
362
  applyRuntimeInjections: applyRuntimeInjectionsMock,
365
363
  buildUnifiedTurnContextBlock: buildUnifiedTurnContextBlockMock,
366
364
  stripInjectionsForCompaction: (msgs: Message[]) => msgs,
367
- findLastInjectedNowContent: () => null,
368
- readNowScratchpad: () => null,
369
- readPkbContext: () => null,
370
- getPkbAutoInjectList: () => [
371
- "INDEX.md",
372
- "essentials.md",
373
- "threads.md",
374
- "buffer.md",
375
- ],
376
365
  isSlackChannelConversation: () => false,
377
366
  getSlackCompactionWatermarkForPrefix:
378
367
  getSlackCompactionWatermarkForPrefixMock,
@@ -549,195 +538,78 @@ mock.module("../proactive-artifact/index.js", () => ({
549
538
 
550
539
  // ── Imports (after mocks) ────────────────────────────────────────────
551
540
 
541
+ import { AgentLoop } from "../agent/loop.js";
552
542
  import {
553
543
  type AgentLoopConversationContext,
554
544
  applyCompactionResult,
555
545
  runAgentLoopImpl,
556
546
  } from "../daemon/conversation-agent-loop.js";
547
+ import {
548
+ createMockProvider,
549
+ type ScriptedResponse,
550
+ textResponse,
551
+ toolUseResponse,
552
+ } from "./helpers/mock-provider.js";
557
553
 
558
554
  // ── Test helpers ─────────────────────────────────────────────────────
559
555
 
560
- type AgentLoopRun = (
561
- messages: Message[],
562
- onEvent: (event: AgentEvent) => void | Promise<void>,
563
- options?: AgentLoopRunOptions,
564
- ) => Promise<Message[]>;
565
-
566
- /**
567
- * Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
568
- * the compaction pipeline against the supplied turn context (which carries the
569
- * test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
570
- * return the continuation history — or `null` on timeout/exhaustion so the
571
- * caller yields "budget".
572
- */
573
- async function simulateInlineCompaction(
574
- compaction: MidLoopCompaction,
575
- history: Message[],
576
- turnContext: TurnContext | undefined,
577
- signal: AbortSignal | undefined,
578
- onEvent: (event: AgentEvent) => void | Promise<void>,
579
- compactionCircuit: CompactionCircuit,
580
- ): Promise<Message[] | null> {
581
- await onEvent({ type: "context_compacting" });
582
- const { rawHistory, options } = compaction.prepare(history);
583
- let result: CompactionResult;
584
- try {
585
- result = await runPipeline<CompactionArgs, CompactionResult>(
586
- "compaction",
587
- getMiddlewaresFor("compaction"),
588
- (args) => defaultCompactionTerminal(args, turnContext as TurnContext),
589
- { messages: rawHistory, signal, options },
590
- turnContext as TurnContext,
591
- DEFAULT_TIMEOUTS.compaction,
592
- );
593
- } catch (error) {
594
- if (error instanceof PluginTimeoutError) {
595
- await compactionCircuit.recordOutcome(
596
- {
597
- currentRequestId: turnContext?.requestId,
598
- currentTurnTrustContext: turnContext?.trust,
599
- turnCount: turnContext?.turnIndex ?? 0,
600
- },
601
- true,
602
- onEvent,
603
- );
604
- return null;
605
- }
606
- throw error;
607
- }
608
- const compactResult = result as ContextWindowResult;
609
- if (compactResult.summaryFailed !== undefined) {
610
- await compactionCircuit.recordOutcome(
611
- {
612
- currentRequestId: turnContext?.requestId,
613
- currentTurnTrustContext: turnContext?.trust,
614
- turnCount: turnContext?.turnIndex ?? 0,
615
- },
616
- compactResult.summaryFailed,
617
- onEvent,
618
- );
619
- }
620
- if (compactResult.compacted) {
621
- await compaction.applyResult(compactResult, rawHistory);
622
- }
623
- if (compactResult.exhausted ?? false) {
624
- return null;
625
- }
626
- return compaction.reinject();
627
- }
628
-
629
- /**
630
- * Adapt a `Message[]`-returning mock loop body into `run()`'s real result
631
- * shape. Mirrors the production loop: the pause-reason carried back is
632
- * whatever the most recent `onCheckpoint` call yielded with (null when it
633
- * never yielded), so the orchestrator derives its yield bookkeeping the same
634
- * way it does against the real loop.
635
- */
636
- const asAgentLoopRun = (
637
- fn: AgentLoopRun,
638
- compactionCircuit: CompactionCircuit,
639
- ): ((
640
- messages: Message[],
641
- onEvent: (event: AgentEvent) => void | Promise<void>,
642
- options?: AgentLoopRunOptions,
643
- ) => Promise<AgentLoopRunResult>) => {
644
- return async (messages, onEvent, options) => {
645
- let exitReason: AgentLoopRunResult["exitReason"] = null;
646
- let wrapped = options;
647
- if (options?.onCheckpoint) {
648
- const inner = options.onCheckpoint;
649
- wrapped = {
650
- ...options,
651
- onCheckpoint: async (info) => {
652
- // Handoff is offered first, mirroring the loop's ordering.
653
- const decision = await inner(info);
654
- if (decision !== "continue") {
655
- exitReason = decision;
656
- return decision;
657
- }
658
- // The mid-loop budget gate and inline compaction both live inside
659
- // `AgentLoop.run`. Replicate them here — same formula, stubbed
660
- // estimator, and the loop's own `compact()` ceremony — so these
661
- // orchestrator tests drive the real escalation path now that the
662
- // orchestrator's `onCheckpoint` is handoff-only and compaction runs
663
- // inline rather than via an orchestrator re-entry loop.
664
- const contextWindow = options.resolveContextWindow?.();
665
- if (contextWindow?.overflowRecovery.enabled) {
666
- const { maxInputTokens, overflowRecovery } = contextWindow;
667
- const safetyMargin =
668
- info.history.length > 50
669
- ? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
670
- : overflowRecovery.safetyMarginRatio;
671
- const preflightBudget = Math.floor(
672
- maxInputTokens * (1 - safetyMargin),
673
- );
674
- if (mockEstimateTokens > preflightBudget * 0.85) {
675
- // Mirror `AgentLoop.compact()`: when a compaction path is
676
- // supplied, run it in place and continue; on timeout or
677
- // exhaustion it returns null, so the loop yields "budget".
678
- const compacted = options.compaction
679
- ? await simulateInlineCompaction(
680
- options.compaction,
681
- info.history,
682
- options.turnContext,
683
- options.signal,
684
- onEvent,
685
- compactionCircuit,
686
- )
687
- : null;
688
- if (compacted) {
689
- exitReason = null;
690
- return "continue";
691
- }
692
- exitReason = "budget";
693
- return "budget";
694
- }
695
- }
696
- exitReason = null;
697
- return "continue";
698
- },
699
- };
700
- }
701
- const history = await fn(messages, onEvent, wrapped);
702
- return { history, exitReason };
703
- };
704
- };
705
-
706
556
  function makeCtx(
707
557
  overrides?: Partial<AgentLoopConversationContext> & {
708
- agentLoopRun?: AgentLoopRun;
558
+ providerResponses?: ScriptedResponse[];
559
+ loopProvider?: Provider;
560
+ loopTools?: ToolDefinition[];
561
+ toolExecutor?: LoopToolExecutor;
709
562
  },
710
563
  ): AgentLoopConversationContext {
711
- const agentLoopRun =
712
- overrides?.agentLoopRun ??
713
- (async (messages: Message[]) => [
714
- ...messages,
715
- {
716
- role: "assistant" as const,
717
- content: [{ type: "text" as const, text: "response" }],
718
- },
719
- ]);
720
-
721
- const compactionCircuit = new CompactionCircuit("test-conv");
564
+ const {
565
+ providerResponses,
566
+ loopProvider,
567
+ loopTools,
568
+ toolExecutor,
569
+ ...ctxOverrides
570
+ } = overrides ?? {};
571
+ const conversationId = ctxOverrides.conversationId ?? "test-conv";
572
+ let processing = true;
573
+
574
+ // Drive the real `AgentLoop` against a scripted provider, mocking only the
575
+ // provider HTTP boundary. The loop owns its mid-loop budget gate, inline
576
+ // compaction, and event emission, so these orchestrator tests exercise the
577
+ // real escalation/persistence path.
578
+ //
579
+ // Name the loop's provider after `ctx.provider` so the two stay in sync,
580
+ // mirroring production where the orchestrator hands the same provider to
581
+ // the loop. The loop stamps this name onto `usage.actualProvider` whenever
582
+ // a response omits its own, which is what the request-log fallback reads.
583
+ // Tests that need to introspect provider calls (or sequence a rejection)
584
+ // build their own `loopProvider` via `createMockProvider`.
585
+ const loopProviderName =
586
+ (ctxOverrides.provider as { name?: string } | undefined)?.name ??
587
+ "mock-provider";
588
+ const provider =
589
+ loopProvider ??
590
+ createMockProvider(
591
+ providerResponses ?? [textResponse("response")],
592
+ loopProviderName,
593
+ ).provider;
594
+ const agentLoop = new AgentLoop(provider, "system prompt", {
595
+ conversationId,
596
+ tools: loopTools ?? [],
597
+ toolExecutor,
598
+ });
722
599
 
723
600
  return {
724
601
  conversationId: "test-conv",
725
602
  messages: [
726
603
  { role: "user", content: [{ type: "text", text: "Hello" }] },
727
604
  ] as Message[],
728
- processing: true,
605
+ isProcessing: () => processing,
606
+ setProcessing: (value: boolean) => {
607
+ processing = value;
608
+ },
729
609
  abortController: new AbortController(),
730
610
  currentRequestId: "test-req",
731
611
 
732
- agentLoop: {
733
- run: asAgentLoopRun(agentLoopRun, compactionCircuit),
734
- getToolTokenBudget: () => 0,
735
- getResolvedTools: () => [],
736
- // Tests here don't exercise calibration; returning undefined makes
737
- // the estimator use the per-provider aggregate key.
738
- getActiveModel: () => undefined,
739
- compactionCircuit,
740
- } as unknown as AgentLoopConversationContext["agentLoop"],
612
+ agentLoop,
741
613
  provider: {
742
614
  name: "mock-provider",
743
615
  sendMessage: async () => ({
@@ -766,8 +638,6 @@ function makeCtx(
766
638
  currentTurnSurfaces: [],
767
639
 
768
640
  workingDir: "/tmp",
769
- workspaceTopLevelContext: null,
770
- workspaceTopLevelDirty: false,
771
641
  channelCapabilities: undefined,
772
642
  commandIntent: undefined,
773
643
  trustContext: undefined,
@@ -804,7 +674,6 @@ function makeCtx(
804
674
  getWorkspaceGitService: () => ({ ensureInitialized: async () => {} }),
805
675
  commitTurnChanges: async () => {},
806
676
 
807
- refreshWorkspaceTopLevelContextIfNeeded: () => {},
808
677
  markWorkspaceTopLevelDirty: () => {},
809
678
  emitActivityState: () => {},
810
679
  getQueueDepth: () => 0,
@@ -830,9 +699,10 @@ function makeCtx(
830
699
  injectedTokens: 0,
831
700
  }),
832
701
  retrackCachedNodes: () => {},
702
+ recordPkbQueryVectors: () => {},
833
703
  } as unknown as AgentLoopConversationContext["graphMemory"],
834
704
 
835
- ...overrides,
705
+ ...ctxOverrides,
836
706
  } as AgentLoopConversationContext;
837
707
  }
838
708
 
@@ -873,6 +743,9 @@ beforeEach(() => {
873
743
  setConversationHistoryStrippedAtMock.mockClear();
874
744
  setConversationHistoryStrippedAtMock.mockImplementation(() => {});
875
745
  applyRuntimeInjectionsMock.mockClear();
746
+ applyRuntimeInjectionsMock.mockImplementation(
747
+ defaultApplyRuntimeInjectionsImpl,
748
+ );
876
749
  buildUnifiedTurnContextBlockMock.mockClear();
877
750
  resolveTurnTimezoneContextMock.mockClear();
878
751
  formatTurnTimestampMock.mockClear();
@@ -886,11 +759,10 @@ beforeEach(() => {
886
759
  projectAssistantMessageMock.mockClear();
887
760
  publishSyncInvalidationMock.mockClear();
888
761
  mockMessageById = null;
889
- // Orchestrator pipelines (overflowReduce, persistence, …) run through the
890
- // plugin registry; reset and re-register every default so the pipelines
891
- // dispatch to middleware backed by the mocked collaborators these tests
892
- // install (`reduceContextOverflow`, `syncMessageToDisk`, etc.) instead of
893
- // hitting the bare terminals.
762
+ // The compaction pipeline runs through the plugin registry; reset and
763
+ // re-register every default so it dispatches to middleware backed by the
764
+ // mocked collaborators these tests install (`syncMessageToDisk`, etc.)
765
+ // instead of hitting the bare terminal.
894
766
  resetPluginRegistryAndRegisterDefaults();
895
767
  });
896
768
 
@@ -970,57 +842,28 @@ describe("session-agent-loop", () => {
970
842
  mockHasProactiveArtifactCompleted = false;
971
843
  mockTryClaimProactiveArtifactTrigger = true;
972
844
 
973
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
974
- // Prime the assistant row anchor for LLM call 1 — production code
975
- // emits this from `AgentLoop.run` just before `provider.sendMessage`.
976
- await onEvent({ type: "llm_call_started" });
977
- await onEvent({
978
- type: "message_complete",
979
- message: {
980
- role: "assistant",
981
- content: [{ type: "text", text: "I'll build that app." }],
982
- },
983
- });
984
- await onEvent({
985
- type: "tool_use",
986
- id: "tool-1",
987
- name: "app_create",
988
- input: { name: "Flow" },
989
- });
990
- await onEvent({
991
- type: "tool_result",
992
- toolUseId: "tool-1",
993
- content: "{}",
994
- isError: false,
995
- });
996
- await options?.onCheckpoint?.({
997
- turnIndex: 0,
998
- toolCount: 1,
999
- hasToolUse: true,
1000
- history: messages,
1001
- });
1002
- // Prime the anchor again for LLM call 2 — multi-call agent turns
1003
- // reserve a fresh assistant row per LLM call.
1004
- await onEvent({ type: "llm_call_started" });
1005
- await onEvent({
1006
- type: "message_complete",
1007
- message: {
1008
- role: "assistant",
1009
- content: [{ type: "text", text: "Done." }],
1010
- },
1011
- });
1012
- return [
1013
- ...messages,
1014
- {
1015
- role: "assistant" as const,
1016
- content: [{ type: "text" as const, text: "Done." }],
1017
- },
1018
- ];
1019
- };
1020
-
845
+ // A two-call agent turn: the model invokes `app_create`, then wraps up
846
+ // with a final text reply.
1021
847
  const ctx = makeCtx({
1022
848
  conversationId: "test-conv",
1023
- agentLoopRun,
849
+ providerResponses: [
850
+ {
851
+ content: [
852
+ { type: "text", text: "I'll build that app." },
853
+ {
854
+ type: "tool_use",
855
+ id: "tool-1",
856
+ name: "app_create",
857
+ input: { name: "Flow" },
858
+ },
859
+ ],
860
+ model: "mock-model",
861
+ usage: { inputTokens: 10, outputTokens: 5 },
862
+ stopReason: "tool_use",
863
+ },
864
+ textResponse("Done."),
865
+ ],
866
+ toolExecutor: async () => ({ content: "{}", isError: false }),
1024
867
  });
1025
868
  await runAgentLoopImpl(
1026
869
  ctx,
@@ -1038,7 +881,23 @@ describe("session-agent-loop", () => {
1038
881
  });
1039
882
 
1040
883
  describe("disk pressure injection context", () => {
1041
- test("passes cleanup context into runtime injections for cleanup-mode turns", async () => {
884
+ // The loop sets `ctx.diskPressureCleanupModeActive` for the duration of the
885
+ // turn (the disk-pressure-warning injector reads it via the per-conversation
886
+ // registry) and resets it in the turn-end cleanup path. Snapshot the flag at
887
+ // each `applyRuntimeInjections` call so assertions observe its value while
888
+ // injection runs, not the post-turn reset.
889
+ function captureCleanupFlagDuringInjection(ctx: {
890
+ diskPressureCleanupModeActive?: boolean;
891
+ }): () => Array<boolean | undefined> {
892
+ const observed: Array<boolean | undefined> = [];
893
+ applyRuntimeInjectionsMock.mockImplementation(async (msgs: Message[]) => {
894
+ observed.push(ctx.diskPressureCleanupModeActive);
895
+ return { messages: msgs, blocks: { ...mockInjectionBlocks } };
896
+ });
897
+ return () => observed;
898
+ }
899
+
900
+ test("sets the cleanup-mode flag on the conversation for cleanup-mode turns", async () => {
1042
901
  mockDiskPressureDecision = {
1043
902
  action: "allow-cleanup-mode",
1044
903
  reason: "guardian",
@@ -1061,6 +920,7 @@ describe("session-agent-loop", () => {
1061
920
  trustClass: "guardian",
1062
921
  } as AgentLoopConversationContext["trustContext"],
1063
922
  });
923
+ const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
1064
924
 
1065
925
  await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
1066
926
 
@@ -1079,21 +939,16 @@ describe("session-agent-loop", () => {
1079
939
  },
1080
940
  }),
1081
941
  );
1082
- const firstInjectionOptions = applyRuntimeInjectionsMock.mock
1083
- .calls[0]![1] as {
1084
- diskPressureContext?: { cleanupModeActive: boolean } | null;
1085
- };
1086
- expect(firstInjectionOptions.diskPressureContext).toEqual({
1087
- cleanupModeActive: true,
1088
- });
942
+ expect(cleanupFlagDuringInjection()).toEqual([true]);
1089
943
  });
1090
944
 
1091
- test("passes cleanup context into runtime injections for local-owner turns", async () => {
945
+ test("sets the cleanup-mode flag on the conversation for local-owner turns", async () => {
1092
946
  mockDiskPressureDecision = {
1093
947
  action: "allow-cleanup-mode",
1094
948
  reason: "local-owner",
1095
949
  };
1096
950
  const ctx = makeCtx();
951
+ const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
1097
952
 
1098
953
  await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
1099
954
 
@@ -1105,16 +960,10 @@ describe("session-agent-loop", () => {
1105
960
  trustContext: null,
1106
961
  }),
1107
962
  );
1108
- const firstInjectionOptions = applyRuntimeInjectionsMock.mock
1109
- .calls[0]![1] as {
1110
- diskPressureContext?: { cleanupModeActive: boolean } | null;
1111
- };
1112
- expect(firstInjectionOptions.diskPressureContext).toEqual({
1113
- cleanupModeActive: true,
1114
- });
963
+ expect(cleanupFlagDuringInjection()).toEqual([true]);
1115
964
  });
1116
965
 
1117
- test("keeps cleanup context on overflow recovery reinjection", async () => {
966
+ test("keeps the cleanup-mode flag set across overflow recovery reinjection", async () => {
1118
967
  mockDiskPressureDecision = {
1119
968
  action: "allow-cleanup-mode",
1120
969
  reason: "guardian",
@@ -1136,18 +985,14 @@ describe("session-agent-loop", () => {
1136
985
  trustClass: "guardian",
1137
986
  } as AgentLoopConversationContext["trustContext"],
1138
987
  });
988
+ const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
1139
989
 
1140
990
  await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
1141
991
 
1142
992
  expect(applyRuntimeInjectionsMock.mock.calls.length).toBeGreaterThan(1);
1143
- for (const call of applyRuntimeInjectionsMock.mock.calls) {
1144
- const options = call[1] as {
1145
- diskPressureContext?: { cleanupModeActive: boolean } | null;
1146
- };
1147
- expect(options.diskPressureContext).toEqual({
1148
- cleanupModeActive: true,
1149
- });
1150
- }
993
+ const flags = cleanupFlagDuringInjection();
994
+ expect(flags.length).toBeGreaterThan(1);
995
+ expect(flags.every((flag) => flag === true)).toBe(true);
1151
996
  });
1152
997
 
1153
998
  test("blocks policy-denied turns before runtime injection or model execution", async () => {
@@ -1156,9 +1001,6 @@ describe("session-agent-loop", () => {
1156
1001
  reason: "trusted-contact",
1157
1002
  };
1158
1003
  const events: ServerMessage[] = [];
1159
- const agentLoopRun = mock(async (_messages: Message[]) => {
1160
- throw new Error("agent loop should not run");
1161
- });
1162
1004
  const activityStates: unknown[][] = [];
1163
1005
  const traceEvents: unknown[][] = [];
1164
1006
  const ctx = makeCtx({
@@ -1171,14 +1013,11 @@ describe("session-agent-loop", () => {
1171
1013
  },
1172
1014
  } as unknown as AgentLoopConversationContext["traceEmitter"],
1173
1015
  });
1174
- ctx.agentLoop.run = asAgentLoopRun(
1175
- agentLoopRun,
1176
- ctx.agentLoop.compactionCircuit,
1177
- );
1016
+ const runSpy = spyOn(ctx.agentLoop, "run");
1178
1017
 
1179
1018
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
1180
1019
 
1181
- expect(agentLoopRun).not.toHaveBeenCalled();
1020
+ expect(runSpy).not.toHaveBeenCalled();
1182
1021
  expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
1183
1022
  expect(activityStates).toContainEqual([
1184
1023
  "idle",
@@ -1238,7 +1077,7 @@ describe("session-agent-loop", () => {
1238
1077
  });
1239
1078
 
1240
1079
  expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
1241
- expect(ctx.processing).toBe(false);
1080
+ expect(ctx.isProcessing()).toBe(false);
1242
1081
  expect(ctx.abortController).toBeNull();
1243
1082
  expect(ctx.currentRequestId).toBeUndefined();
1244
1083
  expect(drainQueue).toHaveBeenCalledWith("loop_complete");
@@ -1254,47 +1093,14 @@ describe("session-agent-loop", () => {
1254
1093
  test("error events from agent loop are classified and emitted", async () => {
1255
1094
  const events: ServerMessage[] = [];
1256
1095
 
1257
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1258
- // Prime the assistant row anchor — production code emits this from
1259
- // `AgentLoop.run` just before `provider.sendMessage`.
1260
- await onEvent({ type: "llm_call_started" });
1261
- // Simulate tool_use + error during execution
1262
- onEvent({
1263
- type: "tool_use",
1264
- id: "tu-1",
1265
- name: "bash",
1266
- input: { cmd: "ls" },
1267
- });
1268
- onEvent({
1269
- type: "error",
1270
- error: new Error("Tool execution failed: permission denied"),
1271
- });
1272
- onEvent({
1273
- type: "message_complete",
1274
- message: {
1275
- role: "assistant",
1276
- content: [{ type: "text", text: "I encountered an error" }],
1277
- },
1278
- });
1279
- onEvent({
1280
- type: "usage",
1281
- inputTokens: 100,
1282
- outputTokens: 50,
1283
- model: "test-model",
1284
- providerDurationMs: 200,
1285
- });
1286
- return [
1287
- ...messages,
1288
- {
1289
- role: "assistant" as const,
1290
- content: [
1291
- { type: "text", text: "I encountered an error" },
1292
- ] as ContentBlock[],
1293
- },
1294
- ];
1295
- };
1296
-
1297
- const ctx = makeCtx({ agentLoopRun });
1096
+ // The model calls a tool whose executor throws, surfacing an `error`
1097
+ // event from the loop's catch handler.
1098
+ const ctx = makeCtx({
1099
+ providerResponses: [toolUseResponse("tu-1", "bash", { cmd: "ls" })],
1100
+ toolExecutor: async () => {
1101
+ throw new Error("Tool execution failed: permission denied");
1102
+ },
1103
+ });
1298
1104
  await runAgentLoopImpl(ctx, "run ls", "msg-1", (msg) => events.push(msg));
1299
1105
 
1300
1106
  const conversationError = events.find(
@@ -1306,34 +1112,9 @@ describe("session-agent-loop", () => {
1306
1112
  test("non-error agent loop completion does not emit conversation_error", async () => {
1307
1113
  const events: ServerMessage[] = [];
1308
1114
 
1309
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1310
- // Prime the assistant row anchor — production code emits this from
1311
- // `AgentLoop.run` just before `provider.sendMessage`.
1312
- await onEvent({ type: "llm_call_started" });
1313
- onEvent({
1314
- type: "message_complete",
1315
- message: {
1316
- role: "assistant",
1317
- content: [{ type: "text", text: "All good" }],
1318
- },
1319
- });
1320
- onEvent({
1321
- type: "usage",
1322
- inputTokens: 50,
1323
- outputTokens: 25,
1324
- model: "test-model",
1325
- providerDurationMs: 100,
1326
- });
1327
- return [
1328
- ...messages,
1329
- {
1330
- role: "assistant" as const,
1331
- content: [{ type: "text", text: "All good" }] as ContentBlock[],
1332
- },
1333
- ];
1334
- };
1335
-
1336
- const ctx = makeCtx({ agentLoopRun });
1115
+ const ctx = makeCtx({
1116
+ providerResponses: [textResponse("All good")],
1117
+ });
1337
1118
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
1338
1119
 
1339
1120
  const conversationError = events.find(
@@ -1369,38 +1150,20 @@ describe("session-agent-loop", () => {
1369
1150
  },
1370
1151
  };
1371
1152
 
1372
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1373
- // Prime the assistant row anchor production code emits this from
1374
- // `AgentLoop.run` just before `provider.sendMessage`.
1375
- await onEvent({ type: "llm_call_started" });
1376
- onEvent({
1377
- type: "message_complete",
1378
- message: {
1379
- role: "assistant",
1380
- content: [{ type: "text", text: "Hi there." }],
1381
- },
1382
- });
1383
- onEvent({
1384
- type: "usage",
1385
- inputTokens: 12,
1386
- outputTokens: 3,
1387
- model: "gpt-4.1-2026-03-01",
1388
- actualProvider: "fireworks",
1389
- providerDurationMs: 45,
1390
- rawRequest,
1391
- rawResponse,
1392
- });
1393
- return [
1394
- ...messages,
1153
+ // The provider response carries its own `actualProvider`, so the logged
1154
+ // row should record that name rather than the runtime provider.
1155
+ const ctx = makeCtx({
1156
+ providerResponses: [
1395
1157
  {
1396
- role: "assistant" as const,
1397
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1158
+ content: [{ type: "text", text: "Hi there." }],
1159
+ model: "gpt-4.1-2026-03-01",
1160
+ usage: { inputTokens: 12, outputTokens: 3 },
1161
+ stopReason: "end_turn",
1162
+ actualProvider: "fireworks",
1163
+ rawRequest,
1164
+ rawResponse,
1398
1165
  },
1399
- ];
1400
- };
1401
-
1402
- const ctx = makeCtx({
1403
- agentLoopRun,
1166
+ ],
1404
1167
  provider: {
1405
1168
  name: "openrouter",
1406
1169
  sendMessage: async () => ({
@@ -1437,37 +1200,19 @@ describe("session-agent-loop", () => {
1437
1200
  ],
1438
1201
  };
1439
1202
 
1440
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1441
- // Prime the assistant row anchor production code emits this from
1442
- // `AgentLoop.run` just before `provider.sendMessage`.
1443
- await onEvent({ type: "llm_call_started" });
1444
- onEvent({
1445
- type: "message_complete",
1446
- message: {
1447
- role: "assistant",
1448
- content: [{ type: "text", text: "Hi there." }],
1449
- },
1450
- });
1451
- onEvent({
1452
- type: "usage",
1453
- inputTokens: 12,
1454
- outputTokens: 3,
1455
- model: "gpt-4.1-2026-03-01",
1456
- providerDurationMs: 45,
1457
- rawRequest,
1458
- rawResponse,
1459
- });
1460
- return [
1461
- ...messages,
1203
+ // The provider response omits `actualProvider`, so the loop stamps the
1204
+ // runtime provider name onto the usage event and the row records it.
1205
+ const ctx = makeCtx({
1206
+ providerResponses: [
1462
1207
  {
1463
- role: "assistant" as const,
1464
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1208
+ content: [{ type: "text", text: "Hi there." }],
1209
+ model: "gpt-4.1-2026-03-01",
1210
+ usage: { inputTokens: 12, outputTokens: 3 },
1211
+ stopReason: "end_turn",
1212
+ rawRequest,
1213
+ rawResponse,
1465
1214
  },
1466
- ];
1467
- };
1468
-
1469
- const ctx = makeCtx({
1470
- agentLoopRun,
1215
+ ],
1471
1216
  provider: {
1472
1217
  name: "openrouter",
1473
1218
  sendMessage: async () => ({
@@ -1522,38 +1267,18 @@ describe("session-agent-loop", () => {
1522
1267
  status: "completed",
1523
1268
  };
1524
1269
 
1525
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1526
- // Prime the assistant row anchor — production code emits this from
1527
- // `AgentLoop.run` just before `provider.sendMessage`.
1528
- await onEvent({ type: "llm_call_started" });
1529
- onEvent({
1530
- type: "message_complete",
1531
- message: {
1532
- role: "assistant",
1533
- content: [{ type: "text", text: "Hi there." }],
1534
- },
1535
- });
1536
- onEvent({
1537
- type: "usage",
1538
- inputTokens: 12,
1539
- outputTokens: 3,
1540
- model: "gpt-5.4",
1541
- actualProvider: "openai",
1542
- providerDurationMs: 45,
1543
- rawRequest,
1544
- rawResponse,
1545
- });
1546
- return [
1547
- ...messages,
1270
+ const ctx = makeCtx({
1271
+ providerResponses: [
1548
1272
  {
1549
- role: "assistant" as const,
1550
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1273
+ content: [{ type: "text", text: "Hi there." }],
1274
+ model: "gpt-5.4",
1275
+ usage: { inputTokens: 12, outputTokens: 3 },
1276
+ stopReason: "end_turn",
1277
+ actualProvider: "openai",
1278
+ rawRequest,
1279
+ rawResponse,
1551
1280
  },
1552
- ];
1553
- };
1554
-
1555
- const ctx = makeCtx({
1556
- agentLoopRun,
1281
+ ],
1557
1282
  provider: {
1558
1283
  name: "openai",
1559
1284
  sendMessage: async () => ({
@@ -1593,37 +1318,17 @@ describe("session-agent-loop", () => {
1593
1318
  attrs: Record<string, unknown>;
1594
1319
  }> = [];
1595
1320
 
1596
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1597
- // Prime the assistant row anchor production code emits this from
1598
- // `AgentLoop.run` just before `provider.sendMessage`.
1599
- await onEvent({ type: "llm_call_started" });
1600
- onEvent({ type: "text_delta", text: "Hi." });
1601
- onEvent({
1602
- type: "message_complete",
1603
- message: {
1604
- role: "assistant",
1605
- content: [{ type: "text", text: "Hi." }],
1606
- },
1607
- });
1608
- onEvent({
1609
- type: "usage",
1610
- inputTokens: 10,
1611
- outputTokens: 2,
1612
- model: "gpt-5.5-2026-04-23",
1613
- actualProvider: "openai",
1614
- providerDurationMs: 100,
1615
- });
1616
- return [
1617
- ...messages,
1321
+ const ctx = makeCtx({
1322
+ // The loop replays the text block as a `text_delta` before `usage`.
1323
+ providerResponses: [
1618
1324
  {
1619
- role: "assistant" as const,
1620
- content: [{ type: "text", text: "Hi." }] as ContentBlock[],
1325
+ content: [{ type: "text", text: "Hi." }],
1326
+ model: "gpt-5.5-2026-04-23",
1327
+ usage: { inputTokens: 10, outputTokens: 2 },
1328
+ stopReason: "end_turn",
1329
+ actualProvider: "openai",
1621
1330
  },
1622
- ];
1623
- };
1624
-
1625
- const ctx = makeCtx({
1626
- agentLoopRun,
1331
+ ],
1627
1332
  // Provider name matches actualProvider so both paths agree.
1628
1333
  provider: {
1629
1334
  name: "openai",
@@ -1671,31 +1376,18 @@ describe("session-agent-loop", () => {
1671
1376
  attrs: Record<string, unknown>;
1672
1377
  }> = [];
1673
1378
 
1674
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1675
- // Prime the assistant row anchor production code emits this from
1676
- // `AgentLoop.run` just before `provider.sendMessage`.
1677
- await onEvent({ type: "llm_call_started" });
1678
- // No text_delta — pure tool-call response
1679
- onEvent({
1680
- type: "message_complete",
1681
- message: {
1682
- role: "assistant",
1379
+ const ctx = makeCtx({
1380
+ // An empty-content response: no text block fires `text_delta`, so the
1381
+ // started event falls back to the resolved usage provider name.
1382
+ providerResponses: [
1383
+ {
1683
1384
  content: [],
1385
+ model: "gpt-5.5-2026-04-23",
1386
+ usage: { inputTokens: 10, outputTokens: 2 },
1387
+ stopReason: "end_turn",
1388
+ actualProvider: "openai",
1684
1389
  },
1685
- });
1686
- onEvent({
1687
- type: "usage",
1688
- inputTokens: 10,
1689
- outputTokens: 2,
1690
- model: "gpt-5.5-2026-04-23",
1691
- actualProvider: "openai",
1692
- providerDurationMs: 100,
1693
- });
1694
- return messages;
1695
- };
1696
-
1697
- const ctx = makeCtx({
1698
- agentLoopRun,
1390
+ ],
1699
1391
  provider: {
1700
1392
  name: "anthropic",
1701
1393
  sendMessage: async () => ({
@@ -1737,52 +1429,32 @@ describe("session-agent-loop", () => {
1737
1429
  test("records the actual provider for usage accounting", async () => {
1738
1430
  const events: ServerMessage[] = [];
1739
1431
 
1740
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1741
- // Prime the assistant row anchor — production code emits this from
1742
- // `AgentLoop.run` just before `provider.sendMessage`.
1743
- await onEvent({ type: "llm_call_started" });
1744
- onEvent({
1745
- type: "message_complete",
1746
- message: {
1747
- role: "assistant",
1432
+ const ctx = makeCtx({
1433
+ providerResponses: [
1434
+ {
1748
1435
  content: [{ type: "text", text: "Hi there." }],
1749
- },
1750
- });
1751
- onEvent({
1752
- type: "usage",
1753
- inputTokens: 12,
1754
- outputTokens: 3,
1755
- model: "gpt-4.1-2026-03-01",
1756
- actualProvider: "fireworks",
1757
- providerDurationMs: 45,
1758
- rawRequest: {
1759
- model: "gpt-4.1",
1760
- messages: [{ role: "user", content: "Hello" }],
1761
- },
1762
- rawResponse: {
1763
1436
  model: "gpt-4.1-2026-03-01",
1764
- choices: [
1765
- {
1766
- finish_reason: "stop",
1767
- message: {
1768
- role: "assistant",
1769
- content: "Hi there.",
1437
+ usage: { inputTokens: 12, outputTokens: 3 },
1438
+ stopReason: "end_turn",
1439
+ actualProvider: "fireworks",
1440
+ rawRequest: {
1441
+ model: "gpt-4.1",
1442
+ messages: [{ role: "user", content: "Hello" }],
1443
+ },
1444
+ rawResponse: {
1445
+ model: "gpt-4.1-2026-03-01",
1446
+ choices: [
1447
+ {
1448
+ finish_reason: "stop",
1449
+ message: {
1450
+ role: "assistant",
1451
+ content: "Hi there.",
1452
+ },
1770
1453
  },
1771
- },
1772
- ],
1773
- },
1774
- });
1775
- return [
1776
- ...messages,
1777
- {
1778
- role: "assistant" as const,
1779
- content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
1454
+ ],
1455
+ },
1780
1456
  },
1781
- ];
1782
- };
1783
-
1784
- const ctx = makeCtx({
1785
- agentLoopRun,
1457
+ ],
1786
1458
  provider: {
1787
1459
  name: "openrouter",
1788
1460
  sendMessage: async () => ({
@@ -1852,27 +1524,9 @@ describe("session-agent-loop", () => {
1852
1524
  },
1853
1525
  });
1854
1526
 
1855
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1856
- // Prime the assistant row anchor — production code emits this from
1857
- // `AgentLoop.run` just before `provider.sendMessage`.
1858
- await onEvent({ type: "llm_call_started" });
1859
- onEvent({
1860
- type: "message_complete",
1861
- message: {
1862
- role: "assistant",
1863
- content: [{ type: "text", text: "recovered" }],
1864
- },
1865
- });
1866
- return [
1867
- ...messages,
1868
- {
1869
- role: "assistant" as const,
1870
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
1871
- },
1872
- ];
1873
- };
1874
-
1875
- const ctx = makeCtx({ agentLoopRun });
1527
+ // After the orchestrator's preflight compaction runs, the loop completes
1528
+ // the turn normally.
1529
+ const ctx = makeCtx({ providerResponses: [textResponse("recovered")] });
1876
1530
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
1877
1531
 
1878
1532
  const compactorCall = recordUsageMock.mock.calls.find(
@@ -1911,7 +1565,6 @@ describe("session-agent-loop", () => {
1911
1565
 
1912
1566
  test("convergence loop applies reducer and retries when context-too-large is detected", async () => {
1913
1567
  const events: ServerMessage[] = [];
1914
- let callCount = 0;
1915
1568
  let reducerCalled = false;
1916
1569
 
1917
1570
  // Configure reducer to succeed on first call — return reduced messages
@@ -1945,53 +1598,15 @@ describe("session-agent-loop", () => {
1945
1598
  };
1946
1599
  };
1947
1600
 
1948
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1949
- // Prime the assistant row anchor production code emits this from
1950
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
1951
- // need this on every invocation: each agent-loop iteration reserves
1952
- // its own row.
1953
- await onEvent({ type: "llm_call_started" });
1954
- callCount++;
1955
- if (callCount === 1) {
1956
- onEvent({
1957
- type: "error",
1958
- error: new Error("context_length_exceeded"),
1959
- });
1960
- onEvent({
1961
- type: "usage",
1962
- inputTokens: 100,
1963
- outputTokens: 0,
1964
- model: "test-model",
1965
- providerDurationMs: 50,
1966
- });
1967
- return messages;
1968
- }
1969
- // Second call (after reducer): succeed
1970
- onEvent({
1971
- type: "message_complete",
1972
- message: {
1973
- role: "assistant",
1974
- content: [{ type: "text", text: "recovered" }],
1975
- },
1976
- });
1977
- onEvent({
1978
- type: "usage",
1979
- inputTokens: 50,
1980
- outputTokens: 25,
1981
- model: "test-model",
1982
- providerDurationMs: 100,
1983
- });
1984
- return [
1985
- ...messages,
1986
- {
1987
- role: "assistant" as const,
1988
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
1989
- },
1990
- ];
1991
- };
1601
+ // The provider rejects the first call with a context-too-large error,
1602
+ // then succeeds once the orchestrator has reduced the context.
1603
+ const { provider, calls } = createMockProvider([
1604
+ new Error("context_length_exceeded"),
1605
+ textResponse("recovered"),
1606
+ ]);
1992
1607
 
1993
1608
  const ctx = makeCtx({
1994
- agentLoopRun,
1609
+ loopProvider: provider,
1995
1610
  contextWindowManager: {
1996
1611
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1997
1612
  maybeCompact: async () => ({ compacted: false }),
@@ -2001,7 +1616,7 @@ describe("session-agent-loop", () => {
2001
1616
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2002
1617
 
2003
1618
  expect(reducerCalled).toBe(true);
2004
- expect(callCount).toBe(2);
1619
+ expect(calls.length).toBe(2);
2005
1620
  const compactEvent = events.find((e) => e.type === "context_compacted");
2006
1621
  expect(compactEvent).toBeDefined();
2007
1622
  });
@@ -2009,23 +1624,10 @@ describe("session-agent-loop", () => {
2009
1624
  test("emits conversation_error when context stays too large after all recovery attempts", async () => {
2010
1625
  const events: ServerMessage[] = [];
2011
1626
 
2012
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2013
- onEvent({
2014
- type: "error",
2015
- error: new Error("context_length_exceeded"),
2016
- });
2017
- onEvent({
2018
- type: "usage",
2019
- inputTokens: 100,
2020
- outputTokens: 0,
2021
- model: "test-model",
2022
- providerDurationMs: 50,
2023
- });
2024
- return messages;
2025
- };
2026
-
1627
+ // The provider rejects every call with a context-too-large error, so the
1628
+ // orchestrator exhausts its recovery attempts.
2027
1629
  const ctx = makeCtx({
2028
- agentLoopRun,
1630
+ providerResponses: [new Error("context_length_exceeded")],
2029
1631
  contextWindowManager: {
2030
1632
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2031
1633
  // Compaction succeeds but context is still too large
@@ -2059,7 +1661,6 @@ describe("session-agent-loop", () => {
2059
1661
 
2060
1662
  test("bounded convergence loop applies reducer tiers and recovers", async () => {
2061
1663
  const events: ServerMessage[] = [];
2062
- let callCount = 0;
2063
1664
  let reducerCalls = 0;
2064
1665
 
2065
1666
  // Reducer: succeed on first call, returning reduced messages
@@ -2077,55 +1678,15 @@ describe("session-agent-loop", () => {
2077
1678
  };
2078
1679
  };
2079
1680
 
2080
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2081
- // Prime the assistant row anchor production code emits this from
2082
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2083
- // need this on every invocation: each agent-loop iteration reserves
2084
- // its own row.
2085
- await onEvent({ type: "llm_call_started" });
2086
- callCount++;
2087
- if (callCount === 1) {
2088
- onEvent({
2089
- type: "error",
2090
- error: new Error("context_length_exceeded"),
2091
- });
2092
- onEvent({
2093
- type: "usage",
2094
- inputTokens: 100,
2095
- outputTokens: 0,
2096
- model: "test-model",
2097
- providerDurationMs: 50,
2098
- });
2099
- return messages;
2100
- }
2101
- // After reducer runs, succeed
2102
- onEvent({
2103
- type: "message_complete",
2104
- message: {
2105
- role: "assistant",
2106
- content: [{ type: "text", text: "recovered via convergence" }],
2107
- },
2108
- });
2109
- onEvent({
2110
- type: "usage",
2111
- inputTokens: 50,
2112
- outputTokens: 25,
2113
- model: "test-model",
2114
- providerDurationMs: 100,
2115
- });
2116
- return [
2117
- ...messages,
2118
- {
2119
- role: "assistant" as const,
2120
- content: [
2121
- { type: "text", text: "recovered via convergence" },
2122
- ] as ContentBlock[],
2123
- },
2124
- ];
2125
- };
1681
+ // The provider rejects the first call with a context-too-large error,
1682
+ // then succeeds once the orchestrator has reduced the context.
1683
+ const { provider, calls } = createMockProvider([
1684
+ new Error("context_length_exceeded"),
1685
+ textResponse("recovered via convergence"),
1686
+ ]);
2126
1687
 
2127
1688
  const ctx = makeCtx({
2128
- agentLoopRun,
1689
+ loopProvider: provider,
2129
1690
  contextWindowManager: {
2130
1691
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2131
1692
  maybeCompact: async () => ({ compacted: false }),
@@ -2135,7 +1696,7 @@ describe("session-agent-loop", () => {
2135
1696
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2136
1697
 
2137
1698
  expect(reducerCalls).toBeGreaterThanOrEqual(1);
2138
- expect(callCount).toBe(2);
1699
+ expect(calls.length).toBe(2);
2139
1700
  const conversationError = events.find(
2140
1701
  (e) => e.type === "conversation_error",
2141
1702
  );
@@ -2146,7 +1707,6 @@ describe("session-agent-loop", () => {
2146
1707
 
2147
1708
  test("non-interactive auto-compress continues without approval prompt", async () => {
2148
1709
  const events: ServerMessage[] = [];
2149
- let callCount = 0;
2150
1710
 
2151
1711
  // Reducer exhausts all tiers
2152
1712
  mockReducerStepFn = (msgs: Message[]) => ({
@@ -2167,54 +1727,14 @@ describe("session-agent-loop", () => {
2167
1727
 
2168
1728
  mockOverflowAction = "auto_compress_latest_turn";
2169
1729
 
2170
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2171
- // Prime the assistant row anchor production code emits this from
2172
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2173
- // need this on every invocation: each agent-loop iteration reserves
2174
- // its own row.
2175
- await onEvent({ type: "llm_call_started" });
2176
- callCount++;
2177
- if (callCount <= 2) {
2178
- onEvent({
2179
- type: "error",
2180
- error: new Error("context_length_exceeded"),
2181
- });
2182
- onEvent({
2183
- type: "usage",
2184
- inputTokens: 100,
2185
- outputTokens: 0,
2186
- model: "test-model",
2187
- providerDurationMs: 50,
2188
- });
2189
- return messages;
2190
- }
2191
- onEvent({
2192
- type: "message_complete",
2193
- message: {
2194
- role: "assistant",
2195
- content: [{ type: "text", text: "auto-recovered" }],
2196
- },
2197
- });
2198
- onEvent({
2199
- type: "usage",
2200
- inputTokens: 50,
2201
- outputTokens: 25,
2202
- model: "test-model",
2203
- providerDurationMs: 100,
2204
- });
2205
- return [
2206
- ...messages,
2207
- {
2208
- role: "assistant" as const,
2209
- content: [
2210
- { type: "text", text: "auto-recovered" },
2211
- ] as ContentBlock[],
2212
- },
2213
- ];
2214
- };
2215
-
1730
+ // The provider rejects the first two calls with context-too-large errors,
1731
+ // then succeeds after the emergency auto-compress runs.
2216
1732
  const ctx = makeCtx({
2217
- agentLoopRun,
1733
+ providerResponses: [
1734
+ new Error("context_length_exceeded"),
1735
+ new Error("context_length_exceeded"),
1736
+ textResponse("auto-recovered"),
1737
+ ],
2218
1738
  hasNoClient: true,
2219
1739
  contextWindowManager: {
2220
1740
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
@@ -2261,7 +1781,6 @@ describe("session-agent-loop", () => {
2261
1781
  // `budget_yield_unrecovered` so the inspector and dashboards can
2262
1782
  // attribute the silent stall.
2263
1783
  const events: ServerMessage[] = [];
2264
- let callCount = 0;
2265
1784
 
2266
1785
  // Reducer exhausts all 4 tiers on first call so the convergence
2267
1786
  // loop runs exactly one iteration before falling through to
@@ -2292,43 +1811,30 @@ describe("session-agent-loop", () => {
2292
1811
  // call). 90k satisfies both so the path reaches call 3.
2293
1812
  mockEstimateTokens = 90_000;
2294
1813
 
2295
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2296
- callCount++;
2297
- if (callCount <= 2) {
2298
- // Calls 1 (initial) and 2 (convergence rerun): error so
2299
- // `state.contextTooLargeDetected` stays true through
2300
- // convergence exit and we enter the auto_compress branch.
2301
- onEvent({
2302
- type: "error",
2303
- error: new Error("context_length_exceeded"),
2304
- });
2305
- onEvent({
2306
- type: "usage",
2307
- inputTokens: 100,
2308
- outputTokens: 0,
2309
- model: "test-model",
2310
- providerDurationMs: 50,
2311
- });
2312
- return messages;
2313
- }
2314
- // Call 3: the auto_compress_latest_turn rerun. Invoke
2315
- // onCheckpoint so the orchestrator's mid-loop budget check
2316
- // flips `yieldedForBudget` to true, then return without
2317
- // finishing — mirroring what AgentLoop.run does when its
2318
- // checkpoint returns "yield".
2319
- if (options?.onCheckpoint) {
2320
- await options.onCheckpoint({
2321
- turnIndex: 0,
2322
- toolCount: 1,
2323
- hasToolUse: true,
2324
- history: messages,
2325
- });
2326
- }
2327
- return messages;
2328
- };
2329
-
1814
+ // Calls 1 (initial) and 2 (convergence rerun) reject with
1815
+ // context-too-large so `contextTooLargeDetected` stays true through the
1816
+ // convergence exit and the orchestrator enters the auto_compress branch.
1817
+ // Call 3 (the auto_compress rerun) is a tool turn: the loop runs it
1818
+ // without a compaction hook, so when its mid-loop budget gate trips on
1819
+ // the still-oversized estimate it yields `exitReason = "budget"` rather
1820
+ // than recovering — the silent-stall path under test.
2330
1821
  const ctx = makeCtx({
2331
- agentLoopRun,
1822
+ providerResponses: [
1823
+ new Error("context_length_exceeded"),
1824
+ new Error("context_length_exceeded"),
1825
+ toolUseResponse("t1", "read_file", { path: "/a.txt" }),
1826
+ ],
1827
+ loopTools: [
1828
+ {
1829
+ name: "read_file",
1830
+ description: "Read a file",
1831
+ input_schema: {
1832
+ type: "object",
1833
+ properties: { path: { type: "string" } },
1834
+ },
1835
+ },
1836
+ ],
1837
+ toolExecutor: async () => ({ content: "data", isError: false }),
2332
1838
  hasNoClient: true,
2333
1839
  contextWindowManager: {
2334
1840
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
@@ -2411,23 +1917,10 @@ describe("session-agent-loop", () => {
2411
1917
  };
2412
1918
  };
2413
1919
 
2414
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2415
- onEvent({
2416
- type: "error",
2417
- error: new Error("context_length_exceeded"),
2418
- });
2419
- onEvent({
2420
- type: "usage",
2421
- inputTokens: 100,
2422
- outputTokens: 0,
2423
- model: "test-model",
2424
- providerDurationMs: 50,
2425
- });
2426
- return messages;
2427
- };
2428
-
1920
+ // The provider rejects every call with a context-too-large error, so the
1921
+ // orchestrator keeps retrying until it hits the attempt ceiling.
2429
1922
  const ctx = makeCtx({
2430
- agentLoopRun,
1923
+ providerResponses: [new Error("context_length_exceeded")],
2431
1924
  contextWindowManager: {
2432
1925
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2433
1926
  maybeCompact: async () => ({ compacted: false }),
@@ -2443,7 +1936,6 @@ describe("session-agent-loop", () => {
2443
1936
  test("preflight budget evaluation invokes reducer before provider call", async () => {
2444
1937
  const events: ServerMessage[] = [];
2445
1938
  let reducerCalls = 0;
2446
- let agentLoopCalls = 0;
2447
1939
 
2448
1940
  // Set token estimate above budget (100000 * 0.95 = 95000)
2449
1941
  mockEstimateTokens = 96000;
@@ -2462,36 +1954,11 @@ describe("session-agent-loop", () => {
2462
1954
  };
2463
1955
  };
2464
1956
 
2465
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2466
- agentLoopCalls++;
2467
- // Prime the assistant row anchor — production code emits this from
2468
- // `AgentLoop.run` just before `provider.sendMessage`.
2469
- await onEvent({ type: "llm_call_started" });
2470
- onEvent({
2471
- type: "message_complete",
2472
- message: {
2473
- role: "assistant",
2474
- content: [{ type: "text", text: "ok" }],
2475
- },
2476
- });
2477
- onEvent({
2478
- type: "usage",
2479
- inputTokens: 50,
2480
- outputTokens: 25,
2481
- model: "test-model",
2482
- providerDurationMs: 100,
2483
- });
2484
- return [
2485
- ...messages,
2486
- {
2487
- role: "assistant" as const,
2488
- content: [{ type: "text", text: "ok" }] as ContentBlock[],
2489
- },
2490
- ];
2491
- };
2492
-
1957
+ // After the preflight reducer brings the estimate under budget, the loop
1958
+ // completes the turn in a single provider call.
1959
+ const { provider, calls } = createMockProvider([textResponse("ok")]);
2493
1960
  const ctx = makeCtx({
2494
- agentLoopRun,
1961
+ loopProvider: provider,
2495
1962
  contextWindowManager: {
2496
1963
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2497
1964
  maybeCompact: async () => ({ compacted: false }),
@@ -2502,8 +1969,8 @@ describe("session-agent-loop", () => {
2502
1969
 
2503
1970
  // Reducer should have been called during preflight
2504
1971
  expect(reducerCalls).toBeGreaterThanOrEqual(1);
2505
- // Agent loop should still succeed
2506
- expect(agentLoopCalls).toBe(1);
1972
+ // Agent loop should still succeed in a single provider call
1973
+ expect(calls.length).toBe(1);
2507
1974
  const complete = events.find((e) => e.type === "message_complete");
2508
1975
  expect(complete).toBeDefined();
2509
1976
  });
@@ -2512,78 +1979,28 @@ describe("session-agent-loop", () => {
2512
1979
  describe("provider ordering error retry", () => {
2513
1980
  test("retries with deep repair when ordering error is detected", async () => {
2514
1981
  const events: ServerMessage[] = [];
2515
- let callCount = 0;
2516
-
2517
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2518
- // Prime the assistant row anchor — production code emits this from
2519
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2520
- // need this on every invocation: each agent-loop iteration reserves
2521
- // its own row.
2522
- await onEvent({ type: "llm_call_started" });
2523
- callCount++;
2524
- if (callCount === 1) {
2525
- onEvent({
2526
- type: "error",
2527
- error: new Error("messages ordering error"),
2528
- });
2529
- onEvent({
2530
- type: "usage",
2531
- inputTokens: 100,
2532
- outputTokens: 0,
2533
- model: "test-model",
2534
- providerDurationMs: 50,
2535
- });
2536
- return messages;
2537
- }
2538
- // Retry succeeds
2539
- onEvent({
2540
- type: "message_complete",
2541
- message: {
2542
- role: "assistant",
2543
- content: [{ type: "text", text: "fixed" }],
2544
- },
2545
- });
2546
- onEvent({
2547
- type: "usage",
2548
- inputTokens: 50,
2549
- outputTokens: 25,
2550
- model: "test-model",
2551
- providerDurationMs: 100,
2552
- });
2553
- return [
2554
- ...messages,
2555
- {
2556
- role: "assistant" as const,
2557
- content: [{ type: "text", text: "fixed" }] as ContentBlock[],
2558
- },
2559
- ];
2560
- };
2561
1982
 
2562
- const ctx = makeCtx({ agentLoopRun });
1983
+ // The provider rejects the first call with an ordering error, then
1984
+ // succeeds once the orchestrator's deep repair re-sends the turn.
1985
+ const { provider, calls } = createMockProvider([
1986
+ new Error("messages ordering error"),
1987
+ textResponse("fixed"),
1988
+ ]);
1989
+
1990
+ const ctx = makeCtx({ loopProvider: provider });
2563
1991
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2564
1992
 
2565
- expect(callCount).toBe(2);
1993
+ expect(calls.length).toBe(2);
2566
1994
  });
2567
1995
 
2568
1996
  test("emits deferred ordering error when retry also fails", async () => {
2569
1997
  const events: ServerMessage[] = [];
2570
1998
 
2571
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2572
- onEvent({
2573
- type: "error",
2574
- error: new Error("messages ordering error"),
2575
- });
2576
- onEvent({
2577
- type: "usage",
2578
- inputTokens: 100,
2579
- outputTokens: 0,
2580
- model: "test-model",
2581
- providerDurationMs: 50,
2582
- });
2583
- return messages;
2584
- };
2585
-
2586
- const ctx = makeCtx({ agentLoopRun });
1999
+ // The provider rejects every call with an ordering error, so even the
2000
+ // deep-repair retry fails and the orchestrator surfaces the error.
2001
+ const ctx = makeCtx({
2002
+ providerResponses: [new Error("messages ordering error")],
2003
+ });
2587
2004
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2588
2005
 
2589
2006
  const conversationError = events.find(
@@ -2597,62 +2014,18 @@ describe("session-agent-loop", () => {
2597
2014
  test("yields at checkpoint when canHandoffAtCheckpoint returns true", async () => {
2598
2015
  const events: ServerMessage[] = [];
2599
2016
 
2600
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2601
- // Prime the assistant row anchor — production code emits this from
2602
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2603
- // need this on every invocation: each agent-loop iteration reserves
2604
- // its own row.
2605
- await onEvent({ type: "llm_call_started" });
2606
- // Simulate tool use followed by checkpoint
2607
- onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
2608
- onEvent({
2609
- type: "tool_result",
2610
- toolUseId: "tu-1",
2611
- content: "file content",
2612
- isError: false,
2613
- });
2614
- onEvent({
2615
- type: "message_complete",
2616
- message: {
2617
- role: "assistant",
2618
- content: [{ type: "text", text: "partial" }],
2619
- },
2620
- });
2621
- onEvent({
2622
- type: "usage",
2623
- inputTokens: 100,
2624
- outputTokens: 50,
2625
- model: "test-model",
2626
- providerDurationMs: 100,
2627
- });
2628
- if (options?.onCheckpoint) {
2629
- const decision = await options.onCheckpoint({
2630
- turnIndex: 0,
2631
- toolCount: 1,
2632
- hasToolUse: true,
2633
- history: messages,
2634
- });
2635
- if (decision !== "continue") {
2636
- return [
2637
- ...messages,
2638
- {
2639
- role: "assistant" as const,
2640
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2641
- },
2642
- ];
2643
- }
2644
- }
2645
- return [
2646
- ...messages,
2017
+ // A tool turn drives the loop to its first mid-loop checkpoint, where the
2018
+ // orchestrator yields for a queued handoff.
2019
+ const ctx = makeCtx({
2020
+ providerResponses: [toolUseResponse("tu-1", "file_read", {})],
2021
+ loopTools: [
2647
2022
  {
2648
- role: "assistant" as const,
2649
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2023
+ name: "file_read",
2024
+ description: "Read a file",
2025
+ input_schema: { type: "object", properties: {} },
2650
2026
  },
2651
- ];
2652
- };
2653
-
2654
- const ctx = makeCtx({
2655
- agentLoopRun,
2027
+ ],
2028
+ toolExecutor: async () => ({ content: "file content", isError: false }),
2656
2029
  canHandoffAtCheckpoint: () => true,
2657
2030
  } as unknown as Partial<AgentLoopConversationContext>);
2658
2031
 
@@ -2669,52 +2042,21 @@ describe("session-agent-loop", () => {
2669
2042
  test("continues when canHandoffAtCheckpoint returns false", async () => {
2670
2043
  const events: ServerMessage[] = [];
2671
2044
 
2672
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2673
- // Prime the assistant row anchor production code emits this from
2674
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
2675
- // need this on every invocation: each agent-loop iteration reserves
2676
- // its own row.
2677
- await onEvent({ type: "llm_call_started" });
2678
- onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
2679
- onEvent({
2680
- type: "tool_result",
2681
- toolUseId: "tu-1",
2682
- content: "content",
2683
- isError: false,
2684
- });
2685
- onEvent({
2686
- type: "message_complete",
2687
- message: {
2688
- role: "assistant",
2689
- content: [{ type: "text", text: "done" }],
2690
- },
2691
- });
2692
- onEvent({
2693
- type: "usage",
2694
- inputTokens: 100,
2695
- outputTokens: 50,
2696
- model: "test-model",
2697
- providerDurationMs: 100,
2698
- });
2699
- if (options?.onCheckpoint) {
2700
- await options.onCheckpoint({
2701
- turnIndex: 0,
2702
- toolCount: 1,
2703
- hasToolUse: true,
2704
- history: messages,
2705
- });
2706
- }
2707
- return [
2708
- ...messages,
2045
+ // The tool turn reaches a checkpoint, but with handoff disabled the loop
2046
+ // continues to the next turn and completes normally.
2047
+ const ctx = makeCtx({
2048
+ providerResponses: [
2049
+ toolUseResponse("tu-1", "file_read", {}),
2050
+ textResponse("done"),
2051
+ ],
2052
+ loopTools: [
2709
2053
  {
2710
- role: "assistant" as const,
2711
- content: [{ type: "text", text: "done" }] as ContentBlock[],
2054
+ name: "file_read",
2055
+ description: "Read a file",
2056
+ input_schema: { type: "object", properties: {} },
2712
2057
  },
2713
- ];
2714
- };
2715
-
2716
- const ctx = makeCtx({
2717
- agentLoopRun,
2058
+ ],
2059
+ toolExecutor: async () => ({ content: "content", isError: false }),
2718
2060
  canHandoffAtCheckpoint: () => false,
2719
2061
  } as unknown as Partial<AgentLoopConversationContext>);
2720
2062
 
@@ -2736,36 +2078,18 @@ describe("session-agent-loop", () => {
2736
2078
  const events: ServerMessage[] = [];
2737
2079
  const abortController = new AbortController();
2738
2080
 
2739
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2740
- // Prime the assistant row anchor production code emits this from
2741
- // `AgentLoop.run` just before `provider.sendMessage`.
2742
- await onEvent({ type: "llm_call_started" });
2743
- onEvent({
2744
- type: "message_complete",
2745
- message: {
2746
- role: "assistant",
2747
- content: [{ type: "text", text: "partial" }],
2748
- },
2749
- });
2750
- onEvent({
2751
- type: "usage",
2752
- inputTokens: 100,
2753
- outputTokens: 50,
2754
- model: "test-model",
2755
- providerDurationMs: 100,
2756
- });
2757
- // Simulate abort after processing
2758
- abortController.abort();
2759
- return [
2760
- ...messages,
2761
- {
2762
- role: "assistant" as const,
2763
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2764
- },
2765
- ];
2081
+ // The provider completes its response but the user cancels mid-turn, so
2082
+ // the orchestrator observes the aborted signal once the loop returns.
2083
+ const provider: Provider = {
2084
+ name: "mock",
2085
+ async sendMessage(_messages, options) {
2086
+ options?.onEvent?.({ type: "text_delta", text: "partial" });
2087
+ abortController.abort();
2088
+ return textResponse("partial");
2089
+ },
2766
2090
  };
2767
2091
 
2768
- const ctx = makeCtx({ agentLoopRun, abortController });
2092
+ const ctx = makeCtx({ loopProvider: provider, abortController });
2769
2093
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2770
2094
 
2771
2095
  const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2776,13 +2100,16 @@ describe("session-agent-loop", () => {
2776
2100
  const events: ServerMessage[] = [];
2777
2101
  const abortController = new AbortController();
2778
2102
 
2779
- const agentLoopRun: AgentLoopRun = async () => {
2780
- abortController.abort();
2781
- const err = new DOMException("The operation was aborted", "AbortError");
2782
- throw err;
2103
+ // The provider rejects with an AbortError after the user cancels.
2104
+ const provider: Provider = {
2105
+ name: "mock",
2106
+ async sendMessage() {
2107
+ abortController.abort();
2108
+ throw new DOMException("The operation was aborted", "AbortError");
2109
+ },
2783
2110
  };
2784
2111
 
2785
- const ctx = makeCtx({ agentLoopRun, abortController });
2112
+ const ctx = makeCtx({ loopProvider: provider, abortController });
2786
2113
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2787
2114
 
2788
2115
  const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2799,36 +2126,17 @@ describe("session-agent-loop", () => {
2799
2126
  const abortController = new AbortController();
2800
2127
  resolveAssistantAttachmentsMock.mockClear();
2801
2128
 
2802
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2803
- // Prime the assistant row anchor — production code emits this from
2804
- // `AgentLoop.run` just before `provider.sendMessage`.
2805
- await onEvent({ type: "llm_call_started" });
2806
- onEvent({
2807
- type: "message_complete",
2808
- message: {
2809
- role: "assistant",
2810
- content: [{ type: "text", text: "partial" }],
2811
- },
2812
- });
2813
- onEvent({
2814
- type: "usage",
2815
- inputTokens: 100,
2816
- outputTokens: 50,
2817
- model: "test-model",
2818
- providerDurationMs: 100,
2819
- });
2820
- // Simulate abort after processing
2821
- abortController.abort();
2822
- return [
2823
- ...messages,
2824
- {
2825
- role: "assistant" as const,
2826
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
2827
- },
2828
- ];
2129
+ // The provider completes its response but the user cancels mid-turn.
2130
+ const provider: Provider = {
2131
+ name: "mock",
2132
+ async sendMessage(_messages, options) {
2133
+ options?.onEvent?.({ type: "text_delta", text: "partial" });
2134
+ abortController.abort();
2135
+ return textResponse("partial");
2136
+ },
2829
2137
  };
2830
2138
 
2831
- const ctx = makeCtx({ agentLoopRun, abortController });
2139
+ const ctx = makeCtx({ loopProvider: provider, abortController });
2832
2140
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2833
2141
 
2834
2142
  const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2840,96 +2148,50 @@ describe("session-agent-loop", () => {
2840
2148
 
2841
2149
  describe("finally block cleanup", () => {
2842
2150
  test("increments turnCount after successful run", async () => {
2843
- const ctx = makeCtx({
2844
- agentLoopRun: async (messages, onEvent) => {
2845
- // Prime the assistant row anchor — production code emits this from
2846
- // `AgentLoop.run` just before `provider.sendMessage`.
2847
- await onEvent({ type: "llm_call_started" });
2848
- onEvent({
2849
- type: "message_complete",
2850
- message: {
2851
- role: "assistant",
2852
- content: [{ type: "text", text: "hi" }],
2853
- },
2854
- });
2855
- onEvent({
2856
- type: "usage",
2857
- inputTokens: 10,
2858
- outputTokens: 5,
2859
- model: "test",
2860
- providerDurationMs: 50,
2861
- });
2862
- return [
2863
- ...messages,
2864
- {
2865
- role: "assistant" as const,
2866
- content: [{ type: "text", text: "hi" }] as ContentBlock[],
2867
- },
2868
- ];
2869
- },
2870
- });
2151
+ // GIVEN a real loop that answers in a single text turn
2152
+ const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
2871
2153
  expect(ctx.turnCount).toBe(0);
2872
2154
 
2155
+ // WHEN the orchestrator runs the turn to completion
2873
2156
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
2874
2157
 
2158
+ // THEN the finally block increments the turn count
2875
2159
  expect(ctx.turnCount).toBe(1);
2876
2160
  });
2877
2161
 
2878
2162
  test("clears processing state and abort controller", async () => {
2879
- const ctx = makeCtx({
2880
- agentLoopRun: async (messages, onEvent) => {
2881
- // Prime the assistant row anchor — production code emits this from
2882
- // `AgentLoop.run` just before `provider.sendMessage`.
2883
- await onEvent({ type: "llm_call_started" });
2884
- onEvent({
2885
- type: "message_complete",
2886
- message: {
2887
- role: "assistant",
2888
- content: [{ type: "text", text: "hi" }],
2889
- },
2890
- });
2891
- onEvent({
2892
- type: "usage",
2893
- inputTokens: 10,
2894
- outputTokens: 5,
2895
- model: "test",
2896
- providerDurationMs: 50,
2897
- });
2898
- return [
2899
- ...messages,
2900
- {
2901
- role: "assistant" as const,
2902
- content: [{ type: "text", text: "hi" }] as ContentBlock[],
2903
- },
2904
- ];
2905
- },
2906
- });
2163
+ // GIVEN a real loop that answers in a single text turn
2164
+ const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
2907
2165
 
2166
+ // WHEN the orchestrator runs the turn to completion
2908
2167
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
2909
2168
 
2910
- expect(ctx.processing).toBe(false);
2169
+ // THEN the finally block clears all per-turn processing state
2170
+ expect(ctx.isProcessing()).toBe(false);
2911
2171
  expect(ctx.abortController).toBeNull();
2912
2172
  expect(ctx.currentRequestId).toBeUndefined();
2913
2173
  expect(ctx.commandIntent).toBeUndefined();
2914
2174
  });
2915
2175
 
2916
- test("clears state even when agent loop throws", async () => {
2176
+ test("clears state and surfaces a processing error when the provider call fails", async () => {
2177
+ // GIVEN a real loop whose provider rejects with an unexpected error
2917
2178
  const events: ServerMessage[] = [];
2918
2179
  const ctx = makeCtx({
2919
- agentLoopRun: async () => {
2920
- throw new Error("unexpected crash");
2921
- },
2180
+ loopProvider: {
2181
+ name: "mock-provider",
2182
+ async sendMessage() {
2183
+ throw new Error("unexpected crash");
2184
+ },
2185
+ } as unknown as Provider,
2922
2186
  });
2923
2187
 
2188
+ // WHEN the orchestrator runs the turn
2924
2189
  await runAgentLoopImpl(ctx, "hi", "msg-1", (msg) => events.push(msg));
2925
2190
 
2926
- expect(ctx.processing).toBe(false);
2191
+ // THEN the finally block clears per-turn state and the failure is
2192
+ // surfaced as a processing-failed conversation error
2193
+ expect(ctx.isProcessing()).toBe(false);
2927
2194
  expect(ctx.abortController).toBeNull();
2928
- expect(events.find((event) => event.type === "error")).toMatchObject({
2929
- type: "error",
2930
- code: "CONVERSATION_PROCESSING_FAILED",
2931
- errorCategory: "processing_failed",
2932
- });
2933
2195
  expect(
2934
2196
  events.find((event) => event.type === "conversation_error"),
2935
2197
  ).toMatchObject({
@@ -2940,46 +2202,19 @@ describe("session-agent-loop", () => {
2940
2202
  });
2941
2203
 
2942
2204
  test("drains queue after completion", async () => {
2205
+ // GIVEN a real loop that answers in a single text turn
2943
2206
  let drainReason: string | undefined;
2944
2207
  const ctx = makeCtx({
2945
- agentLoopRun: async (
2946
- messages: Message[],
2947
- onEvent: (event: AgentEvent) => void | Promise<void>,
2948
- ) => {
2949
- // Prime the assistant row anchor — production code emits this from
2950
- // `AgentLoop.run` just before `provider.sendMessage`. Must be
2951
- // awaited so the assistant row is reserved before message_complete
2952
- // tries to write into it.
2953
- await onEvent({ type: "llm_call_started" });
2954
- onEvent({
2955
- type: "message_complete",
2956
- message: {
2957
- role: "assistant",
2958
- content: [{ type: "text", text: "ok" }],
2959
- },
2960
- });
2961
- onEvent({
2962
- type: "usage",
2963
- inputTokens: 10,
2964
- outputTokens: 5,
2965
- model: "test",
2966
- providerDurationMs: 50,
2967
- });
2968
- return [
2969
- ...messages,
2970
- {
2971
- role: "assistant" as const,
2972
- content: [{ type: "text", text: "ok" }] as ContentBlock[],
2973
- },
2974
- ];
2975
- },
2208
+ providerResponses: [textResponse("ok")],
2976
2209
  drainQueue: (reason: string) => {
2977
2210
  drainReason = reason;
2978
2211
  },
2979
2212
  } as unknown as Partial<AgentLoopConversationContext>);
2980
2213
 
2214
+ // WHEN the orchestrator runs the turn to completion
2981
2215
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
2982
2216
 
2217
+ // THEN the queue is drained with the loop-complete reason
2983
2218
  expect(drainReason).toBe("loop_complete");
2984
2219
  });
2985
2220
  });
@@ -3098,7 +2333,7 @@ describe("session-agent-loop", () => {
3098
2333
  isUserMessage: true,
3099
2334
  });
3100
2335
 
3101
- expect(ctx.processing).toBe(false);
2336
+ expect(ctx.isProcessing()).toBe(false);
3102
2337
  expect(ctx.abortController).toBeNull();
3103
2338
  expect(ctx.currentRequestId).toBeUndefined();
3104
2339
  });
@@ -3208,24 +2443,17 @@ describe("session-agent-loop", () => {
3208
2443
  test("synthesizes error assistant message when provider returns no response", async () => {
3209
2444
  const events: ServerMessage[] = [];
3210
2445
 
3211
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3212
- // Emit a non-ordering, non-context-too-large error that sets providerErrorUserMessage
3213
- onEvent({
3214
- type: "error",
3215
- error: new Error("Internal processing failure"),
3216
- });
3217
- onEvent({
3218
- type: "usage",
3219
- inputTokens: 100,
3220
- outputTokens: 0,
3221
- model: "test-model",
3222
- providerDurationMs: 50,
3223
- });
3224
- // Return same messages (no assistant message appended)
3225
- return messages;
3226
- };
3227
-
3228
- const ctx = makeCtx({ agentLoopRun });
2446
+ // GIVEN a real loop whose provider rejects with a generic error
2447
+ // (non-ordering, non-context-too-large) so the loop emits `error` and
2448
+ // the orchestrator sets `providerErrorUserMessage`.
2449
+ const ctx = makeCtx({
2450
+ loopProvider: {
2451
+ name: "mock-provider",
2452
+ async sendMessage() {
2453
+ throw new Error("Internal processing failure");
2454
+ },
2455
+ } as unknown as Provider,
2456
+ });
3229
2457
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
3230
2458
 
3231
2459
  // The error should be sent as a conversation_error (not as an
@@ -3249,26 +2477,19 @@ describe("session-agent-loop", () => {
3249
2477
  // sweep would wrong-attach this row to the wrong assistant message.
3250
2478
  const events: ServerMessage[] = [];
3251
2479
 
3252
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3253
- // 1) handleProviderError -> writes an `llm_request_logs` row with
3254
- // messageId=null (the orphan we are trying to link).
3255
- onEvent({
3256
- type: "provider_error",
3257
- error: new Error("upstream 500"),
3258
- rawRequest: { model: "gpt-4.1", messages: [] },
3259
- actualProvider: "openai",
3260
- });
3261
- // 2) handleError -> sets `state.providerErrorUserMessage`, which
3262
- // activates the synthetic-message branch below the loop.
3263
- onEvent({
3264
- type: "error",
3265
- error: new Error("upstream 500"),
3266
- });
3267
- // Provider returned no assistant content — same messages back.
3268
- return messages;
3269
- };
3270
-
3271
- const ctx = makeCtx({ agentLoopRun });
2480
+ // GIVEN a real loop whose provider rejects: the loop emits
2481
+ // `provider_error` (writing an `llm_request_logs` row with
2482
+ // messageId=null the orphan we link) then `error` (which sets
2483
+ // `state.providerErrorUserMessage`, activating the synthetic-message
2484
+ // branch below the loop).
2485
+ const ctx = makeCtx({
2486
+ loopProvider: {
2487
+ name: "mock-provider",
2488
+ async sendMessage() {
2489
+ throw new Error("upstream 500");
2490
+ },
2491
+ } as unknown as Provider,
2492
+ });
3272
2493
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
3273
2494
 
3274
2495
  // The orphan was written with messageId=undefined.
@@ -3315,39 +2536,10 @@ describe("session-agent-loop", () => {
3315
2536
  // observe the sync-invalidation publish path on the same turn.
3316
2537
  projectAssistantMessageMock.mockImplementationOnce(() => true);
3317
2538
 
3318
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3319
- await onEvent({ type: "llm_call_started" });
3320
- // `message_complete` is awaited so `handleMessageComplete` (and its
3321
- // async indexer + projector chain) completes before the next event
3322
- // or before the loop returns. Without the await the projector's
3323
- // synchronous call still races against the test's assertion phase
3324
- // because the indexer's `await` yields microtasks.
3325
- await onEvent({
3326
- type: "message_complete",
3327
- message: {
3328
- role: "assistant",
3329
- content: [{ type: "text", text: "indexed reply" }],
3330
- },
3331
- });
3332
- onEvent({
3333
- type: "usage",
3334
- inputTokens: 10,
3335
- outputTokens: 5,
3336
- model: "test",
3337
- providerDurationMs: 50,
3338
- });
3339
- return [
3340
- ...messages,
3341
- {
3342
- role: "assistant" as const,
3343
- content: [
3344
- { type: "text", text: "indexed reply" },
3345
- ] as ContentBlock[],
3346
- },
3347
- ];
3348
- };
3349
-
3350
- const ctx = makeCtx({ agentLoopRun });
2539
+ // GIVEN a real loop that answers with a single finalized assistant turn
2540
+ const ctx = makeCtx({
2541
+ providerResponses: [textResponse("indexed reply")],
2542
+ });
3351
2543
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3352
2544
 
3353
2545
  // Indexer fired with the reserved row's id + the finalized content.
@@ -3410,34 +2602,8 @@ describe("session-agent-loop", () => {
3410
2602
  metadata: null,
3411
2603
  };
3412
2604
 
3413
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3414
- await onEvent({ type: "llm_call_started" });
3415
- // See sibling test — `message_complete` must be awaited so the
3416
- // projector call lands before the assertion phase.
3417
- await onEvent({
3418
- type: "message_complete",
3419
- message: {
3420
- role: "assistant",
3421
- content: [{ type: "text", text: "quiet" }],
3422
- },
3423
- });
3424
- onEvent({
3425
- type: "usage",
3426
- inputTokens: 1,
3427
- outputTokens: 1,
3428
- model: "test",
3429
- providerDurationMs: 1,
3430
- });
3431
- return [
3432
- ...messages,
3433
- {
3434
- role: "assistant" as const,
3435
- content: [{ type: "text", text: "quiet" }] as ContentBlock[],
3436
- },
3437
- ];
3438
- };
3439
-
3440
- const ctx = makeCtx({ agentLoopRun });
2605
+ // GIVEN a real loop that answers with a single finalized assistant turn
2606
+ const ctx = makeCtx({ providerResponses: [textResponse("quiet")] });
3441
2607
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3442
2608
 
3443
2609
  expect(projectAssistantMessageMock).toHaveBeenCalledTimes(1);
@@ -3462,40 +2628,33 @@ describe("session-agent-loop", () => {
3462
2628
  // Indexer/projector mocks default to no-op; no finalized row in this
3463
2629
  // test, so `mockMessageById` stays null.
3464
2630
 
3465
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3466
- // First LLM call: reserve msg-strand-A, never finalize.
3467
- await onEvent({ type: "llm_call_started" });
3468
- // Second LLM call: should delete msg-strand-A before reserving
3469
- // msg-strand-B.
3470
- await onEvent({ type: "llm_call_started" });
3471
- // Finalize the second one so the loop has a valid assistant message
3472
- // and exits cleanly.
3473
- onEvent({
3474
- type: "message_complete",
3475
- message: {
3476
- role: "assistant",
3477
- content: [{ type: "text", text: "retry succeeded" }],
3478
- },
3479
- });
3480
- onEvent({
3481
- type: "usage",
3482
- inputTokens: 5,
3483
- outputTokens: 3,
3484
- model: "test",
3485
- providerDurationMs: 25,
3486
- });
3487
- return [
3488
- ...messages,
3489
- {
3490
- role: "assistant" as const,
3491
- content: [
3492
- { type: "text", text: "retry succeeded" },
3493
- ] as ContentBlock[],
3494
- },
3495
- ];
3496
- };
2631
+ // A single reducer tier converges the oversized context so the
2632
+ // orchestrator re-enters the loop after the first call fails.
2633
+ mockReducerStepFn = (msgs: Message[]) => ({
2634
+ messages: msgs,
2635
+ tier: "forced_compaction",
2636
+ state: {
2637
+ appliedTiers: ["forced_compaction"],
2638
+ injectionMode: "full",
2639
+ exhausted: false,
2640
+ },
2641
+ estimatedTokens: 5000,
2642
+ });
3497
2643
 
3498
- const ctx = makeCtx({ agentLoopRun });
2644
+ // GIVEN a real loop whose first call rejects with context-too-large
2645
+ // (reserving msg-strand-A but never finalizing it), then recovers via
2646
+ // convergence on re-entry. The re-entry's `llm_call_started` must
2647
+ // delete the stranded msg-strand-A before reserving msg-strand-B.
2648
+ const ctx = makeCtx({
2649
+ providerResponses: [
2650
+ new Error("context_length_exceeded"),
2651
+ textResponse("retry succeeded"),
2652
+ ],
2653
+ contextWindowManager: {
2654
+ shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2655
+ maybeCompact: async () => ({ compacted: false }),
2656
+ } as unknown as AgentLoopConversationContext["contextWindowManager"],
2657
+ });
3499
2658
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3500
2659
 
3501
2660
  // Exactly one delete fires — for msg-strand-A, before the second
@@ -3523,27 +2682,20 @@ describe("session-agent-loop", () => {
3523
2682
  id: "msg-orphaned-reservation",
3524
2683
  }));
3525
2684
 
3526
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3527
- // Reserve the orphan.
3528
- await onEvent({ type: "llm_call_started" });
3529
- // Provider rejects writes the llm_request_log row and arms
3530
- // `state.providerErrorUserMessage` via `handleError`.
3531
- onEvent({
3532
- type: "provider_error",
3533
- error: new Error("upstream 500"),
3534
- rawRequest: { model: "gpt-4.1", messages: [] },
3535
- actualProvider: "openai",
3536
- });
3537
- onEvent({
3538
- type: "error",
3539
- error: new Error("upstream 500"),
3540
- });
3541
- // No assistant message in the result — the synthetic-error branch
3542
- // below the agent loop fires.
3543
- return messages;
3544
- };
3545
-
3546
- const ctx = makeCtx({ agentLoopRun });
2685
+ // GIVEN a real loop that reserves an assistant row at
2686
+ // `llm_call_started`, then whose provider rejects: the loop emits
2687
+ // `provider_error` (writing the llm_request_log row) and `error`
2688
+ // (arming `state.providerErrorUserMessage`), exiting with no
2689
+ // `message_complete` so the synthetic-error branch below the loop
2690
+ // fires.
2691
+ const ctx = makeCtx({
2692
+ loopProvider: {
2693
+ name: "mock-provider",
2694
+ async sendMessage() {
2695
+ throw new Error("upstream 500");
2696
+ },
2697
+ } as unknown as Provider,
2698
+ });
3547
2699
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3548
2700
 
3549
2701
  // The orphan was deleted exactly once, before the synthetic error
@@ -3599,40 +2751,23 @@ describe("session-agent-loop", () => {
3599
2751
  metadata: null,
3600
2752
  };
3601
2753
 
3602
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3603
- await onEvent({ type: "llm_call_started" });
3604
- // Two small deltas well under the 1024-char size gate — should
3605
- // schedule a single debounced flush.
3606
- onEvent({ type: "text_delta", text: "Hello, " });
3607
- onEvent({ type: "text_delta", text: "world." });
3608
- // Wait long enough for the 250ms debounce to fire.
3609
- await new Promise((resolve) => setTimeout(resolve, 1100));
3610
- await onEvent({
3611
- type: "message_complete",
3612
- message: {
3613
- role: "assistant",
3614
- content: [{ type: "text", text: "Hello, world." }],
3615
- },
3616
- });
3617
- onEvent({
3618
- type: "usage",
3619
- inputTokens: 10,
3620
- outputTokens: 5,
3621
- model: "test",
3622
- providerDurationMs: 50,
3623
- });
3624
- return [
3625
- ...messages,
3626
- {
3627
- role: "assistant" as const,
3628
- content: [
3629
- { type: "text", text: "Hello, world." },
3630
- ] as ContentBlock[],
2754
+ // GIVEN a real loop whose provider streams two small deltas (each under
2755
+ // the 1024-char size gate) then holds the turn open past the 250ms
2756
+ // debounce window before completing, so a single debounced partial
2757
+ // flush lands before `message_complete`.
2758
+ const ctx = makeCtx({
2759
+ loopProvider: {
2760
+ name: "mock-provider",
2761
+ async sendMessage(_messages, options) {
2762
+ options?.onEvent?.({ type: "text_delta", text: "Hello, " });
2763
+ options?.onEvent?.({ type: "text_delta", text: "world." });
2764
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2765
+ return textResponse("Hello, world.");
3631
2766
  },
3632
- ];
3633
- };
2767
+ },
2768
+ });
3634
2769
 
3635
- const ctx = makeCtx({ agentLoopRun });
2770
+ // WHEN the orchestrator runs the turn to completion
3636
2771
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3637
2772
 
3638
2773
  // Exactly two `updateContent` calls land:
@@ -3668,70 +2803,38 @@ describe("session-agent-loop", () => {
3668
2803
  metadata: null,
3669
2804
  };
3670
2805
 
3671
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3672
- await onEvent({ type: "llm_call_started" });
3673
- // No text delta only a tool_use. If `handleToolUse` were
3674
- // flushing, this would land a partial write before
3675
- // `message_complete`.
3676
- onEvent({
3677
- type: "tool_use",
3678
- id: "tu-no-flush",
3679
- name: "file_read",
3680
- input: { path: "/foo" },
3681
- });
3682
- // Yield a microtask so any (incorrectly) fire-and-forget
3683
- // pipeline call has a chance to land before message_complete.
3684
- await new Promise((resolve) => setImmediate(resolve));
3685
- onEvent({
3686
- type: "tool_result",
3687
- toolUseId: "tu-no-flush",
3688
- content: "ok",
3689
- isError: false,
3690
- });
3691
- await onEvent({
3692
- type: "message_complete",
3693
- message: {
3694
- role: "assistant",
3695
- content: [
3696
- {
3697
- type: "tool_use",
3698
- id: "tu-no-flush",
3699
- name: "file_read",
3700
- input: { path: "/foo" },
3701
- },
3702
- ],
3703
- },
3704
- });
3705
- onEvent({
3706
- type: "usage",
3707
- inputTokens: 10,
3708
- outputTokens: 5,
3709
- model: "test",
3710
- providerDurationMs: 50,
3711
- });
3712
- return [
3713
- ...messages,
2806
+ // GIVEN a real loop that runs one tool turn — the loop emits `tool_use`
2807
+ // strictly AFTER `message_complete` — and then answers with a final
2808
+ // text turn. The tool executor returns immediately.
2809
+ const ctx = makeCtx({
2810
+ providerResponses: [
2811
+ toolUseResponse("tu-no-flush", "file_read", { path: "/foo" }),
2812
+ textResponse("done"),
2813
+ ],
2814
+ loopTools: [
3714
2815
  {
3715
- role: "assistant" as const,
3716
- content: [
3717
- {
3718
- type: "tool_use",
3719
- id: "tu-no-flush",
3720
- name: "file_read",
3721
- input: { path: "/foo" },
3722
- },
3723
- ] as ContentBlock[],
2816
+ name: "file_read",
2817
+ description: "Read a file",
2818
+ input_schema: {
2819
+ type: "object",
2820
+ properties: { path: { type: "string" } },
2821
+ },
3724
2822
  },
3725
- ];
3726
- };
2823
+ ],
2824
+ toolExecutor: async () => ({ content: "ok", isError: false }),
2825
+ });
3727
2826
 
3728
- const ctx = makeCtx({ agentLoopRun });
2827
+ // WHEN the orchestrator runs the turn to completion
3729
2828
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3730
2829
 
3731
- // Only the authoritative final flush from `handleMessageComplete`
3732
- // lands. A partial flush from `handleToolUse` would have made this
3733
- // 2; that's the regression this test guards against.
3734
- expect(updateMessageContentMock).toHaveBeenCalledTimes(1);
2830
+ // Four authoritative writes land and no stray partial flush:
2831
+ // - one final flush per `message_complete` (the tool turn and the final
2832
+ // text turn), plus
2833
+ // - two grouped tool-result user-row writes (persist-on-arrival and the
2834
+ // turn-boundary finalize).
2835
+ // `handleToolUse` contributes no partial flush of its own; one would make
2836
+ // this 5. That stray flush is the regression this test guards against.
2837
+ expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
3735
2838
  });
3736
2839
 
3737
2840
  test("handleMessageComplete clears any pending debounce timer before the final flush", async () => {
@@ -3744,45 +2847,53 @@ describe("session-agent-loop", () => {
3744
2847
  metadata: null,
3745
2848
  };
3746
2849
 
3747
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3748
- await onEvent({ type: "llm_call_started" });
3749
- // Short delta schedules a debounce timer but does NOT trip the
3750
- // size gate. message_complete then arrives immediately after,
3751
- // before the 250ms timer can fire.
3752
- onEvent({ type: "text_delta", text: "Quick reply." });
3753
- await onEvent({
3754
- type: "message_complete",
3755
- message: {
3756
- role: "assistant",
3757
- content: [{ type: "text", text: "Quick reply." }],
2850
+ // GIVEN a real loop whose first turn streams a short delta (scheduling a
2851
+ // debounce timer) and completes as a tool turn — so `message_complete`
2852
+ // arrives before the 250ms timer and clears it. The tool executor then
2853
+ // holds the loop open well past the original debounce window, proving a
2854
+ // late timer does NOT fire a stray partial flush, before a final text
2855
+ // turn ends the run.
2856
+ const ctx = makeCtx({
2857
+ providerResponses: [
2858
+ {
2859
+ content: [
2860
+ { type: "text", text: "Quick reply." },
2861
+ {
2862
+ type: "tool_use",
2863
+ id: "tu-keep-alive",
2864
+ name: "file_read",
2865
+ input: {},
2866
+ },
2867
+ ],
2868
+ model: "mock-model",
2869
+ usage: { inputTokens: 10, outputTokens: 5 },
2870
+ stopReason: "tool_use",
3758
2871
  },
3759
- });
3760
- onEvent({
3761
- type: "usage",
3762
- inputTokens: 10,
3763
- outputTokens: 5,
3764
- model: "test",
3765
- providerDurationMs: 50,
3766
- });
3767
- // Wait past the original debounce window to prove a late timer
3768
- // does NOT fire a stray partial flush.
3769
- await new Promise((resolve) => setTimeout(resolve, 1100));
3770
- return [
3771
- ...messages,
2872
+ textResponse("done"),
2873
+ ],
2874
+ loopTools: [
3772
2875
  {
3773
- role: "assistant" as const,
3774
- content: [{ type: "text", text: "Quick reply." }] as ContentBlock[],
2876
+ name: "file_read",
2877
+ description: "Read a file",
2878
+ input_schema: { type: "object", properties: {} },
3775
2879
  },
3776
- ];
3777
- };
2880
+ ],
2881
+ toolExecutor: async () => {
2882
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2883
+ return { content: "ok", isError: false };
2884
+ },
2885
+ });
3778
2886
 
3779
- const ctx = makeCtx({ agentLoopRun });
2887
+ // WHEN the orchestrator runs the turn to completion
3780
2888
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3781
2889
 
3782
- // Only the final flush from `handleMessageComplete` lands. The
3783
- // debounced partial would have fired around T+250ms; the timer-clear
3784
- // at the top of `handleMessageComplete` cancels it.
3785
- expect(updateMessageContentMock).toHaveBeenCalledTimes(1);
2890
+ // Four authoritative writes land: one final flush per `message_complete`
2891
+ // (the tool turn and the final text turn) plus two grouped tool-result
2892
+ // user-row writes (persist-on-arrival and the turn-boundary finalize).
2893
+ // The debounced partial would have fired around T+250ms — during the tool
2894
+ // executor's hold — but the timer-clear at the top of
2895
+ // `handleMessageComplete` cancels it, so no stray fifth flush appears.
2896
+ expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
3786
2897
  });
3787
2898
 
3788
2899
  test("partial flushes never trigger the indexer or attention projector", async () => {
@@ -3795,54 +2906,29 @@ describe("session-agent-loop", () => {
3795
2906
  metadata: null,
3796
2907
  };
3797
2908
 
3798
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3799
- await onEvent({ type: "llm_call_started" });
3800
- onEvent({ type: "text_delta", text: "hello world" });
3801
- // Wait past the 250ms debounce so the partial flush definitely
3802
- // lands BEFORE message_complete fires.
3803
- await new Promise((resolve) => setTimeout(resolve, 1100));
3804
- // Snapshot the indexer/projector call counts AFTER the partial
3805
- // flush has run but BEFORE message_complete. They must be zero.
3806
- const indexerCallsBeforeComplete =
3807
- indexMessageNowMock.mock.calls.length;
3808
- const projectorCallsBeforeComplete =
3809
- projectAssistantMessageMock.mock.calls.length;
3810
- // Stash on a side channel the assertion phase can read.
3811
- (
3812
- ctx as unknown as { __partialSnapshot?: [number, number] }
3813
- ).__partialSnapshot = [
3814
- indexerCallsBeforeComplete,
3815
- projectorCallsBeforeComplete,
3816
- ];
3817
- await onEvent({
3818
- type: "message_complete",
3819
- message: {
3820
- role: "assistant",
3821
- content: [{ type: "text", text: "hello world" }],
3822
- },
3823
- });
3824
- onEvent({
3825
- type: "usage",
3826
- inputTokens: 10,
3827
- outputTokens: 5,
3828
- model: "test",
3829
- providerDurationMs: 50,
3830
- });
3831
- return [
3832
- ...messages,
3833
- {
3834
- role: "assistant" as const,
3835
- content: [{ type: "text", text: "hello world" }] as ContentBlock[],
2909
+ // GIVEN a real loop whose provider streams a delta then holds the turn
2910
+ // open past the 250ms debounce window so the partial flush lands BEFORE
2911
+ // `message_complete`. The indexer/projector counts are snapshotted at
2912
+ // that mid-turn point (after the partial flush, before completion).
2913
+ let snapshot: [number, number] | undefined;
2914
+ const ctx = makeCtx({
2915
+ loopProvider: {
2916
+ name: "mock-provider",
2917
+ async sendMessage(_messages, options) {
2918
+ options?.onEvent?.({ type: "text_delta", text: "hello world" });
2919
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2920
+ snapshot = [
2921
+ indexMessageNowMock.mock.calls.length,
2922
+ projectAssistantMessageMock.mock.calls.length,
2923
+ ];
2924
+ return textResponse("hello world");
3836
2925
  },
3837
- ];
3838
- };
2926
+ },
2927
+ });
3839
2928
 
3840
- const ctx = makeCtx({ agentLoopRun });
2929
+ // WHEN the orchestrator runs the turn to completion
3841
2930
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3842
2931
 
3843
- const snapshot = (
3844
- ctx as unknown as { __partialSnapshot?: [number, number] }
3845
- ).__partialSnapshot;
3846
2932
  expect(snapshot).toBeDefined();
3847
2933
  // Indexer + projector were both ZERO during the mid-turn partial
3848
2934
  // flush — they only fire from `handleMessageComplete` after the
@@ -3870,35 +2956,21 @@ describe("session-agent-loop", () => {
3870
2956
  const ghToken = "ghp_" + "a".repeat(36);
3871
2957
  const payload = "Here's the key: " + ghToken + " enjoy.";
3872
2958
 
3873
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3874
- await onEvent({ type: "llm_call_started" });
3875
- onEvent({ type: "text_delta", text: payload });
3876
- // Wait past the 250ms debounce so the partial flush lands.
3877
- await new Promise((resolve) => setTimeout(resolve, 1100));
3878
- await onEvent({
3879
- type: "message_complete",
3880
- message: {
3881
- role: "assistant",
3882
- content: [{ type: "text", text: payload }],
3883
- },
3884
- });
3885
- onEvent({
3886
- type: "usage",
3887
- inputTokens: 10,
3888
- outputTokens: 5,
3889
- model: "test",
3890
- providerDurationMs: 50,
3891
- });
3892
- return [
3893
- ...messages,
3894
- {
3895
- role: "assistant" as const,
3896
- content: [{ type: "text", text: payload }] as ContentBlock[],
2959
+ // GIVEN a real loop whose provider streams the PAT-bearing payload as a
2960
+ // delta then holds the turn open past the 250ms debounce window so the
2961
+ // partial flush lands before `message_complete`.
2962
+ const ctx = makeCtx({
2963
+ loopProvider: {
2964
+ name: "mock-provider",
2965
+ async sendMessage(_messages, options) {
2966
+ options?.onEvent?.({ type: "text_delta", text: payload });
2967
+ await new Promise((resolve) => setTimeout(resolve, 1100));
2968
+ return textResponse(payload);
3897
2969
  },
3898
- ];
3899
- };
2970
+ },
2971
+ });
3900
2972
 
3901
- const ctx = makeCtx({ agentLoopRun });
2973
+ // WHEN the orchestrator runs the turn to completion
3902
2974
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3903
2975
 
3904
2976
  expect(updateMessageContentMock).toHaveBeenCalledTimes(2);
@@ -3922,26 +2994,21 @@ describe("session-agent-loop", () => {
3922
2994
  id: "msg-orphan-with-partial",
3923
2995
  }));
3924
2996
 
3925
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
3926
- await onEvent({ type: "llm_call_started" });
3927
- // A debounced delta lands a partial flush BEFORE the provider
3928
- // error fires.
3929
- onEvent({ type: "text_delta", text: "hello world" });
3930
- await new Promise((resolve) => setTimeout(resolve, 1100));
3931
- onEvent({
3932
- type: "provider_error",
3933
- error: new Error("upstream 500"),
3934
- rawRequest: { model: "gpt-4.1", messages: [] },
3935
- actualProvider: "openai",
3936
- });
3937
- onEvent({
3938
- type: "error",
3939
- error: new Error("upstream 500"),
3940
- });
3941
- return messages;
3942
- };
2997
+ // GIVEN a real loop whose provider streams a delta — landing a debounced
2998
+ // partial flush on the reserved row — then rejects, so the loop emits
2999
+ // `provider_error` and `error` and exits with no `message_complete`.
3000
+ const ctx = makeCtx({
3001
+ loopProvider: {
3002
+ name: "mock-provider",
3003
+ async sendMessage(_messages, options) {
3004
+ options?.onEvent?.({ type: "text_delta", text: "hello world" });
3005
+ await new Promise((resolve) => setTimeout(resolve, 1100));
3006
+ throw new Error("upstream 500");
3007
+ },
3008
+ },
3009
+ });
3943
3010
 
3944
- const ctx = makeCtx({ agentLoopRun });
3011
+ // WHEN the orchestrator runs the turn
3945
3012
  await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
3946
3013
 
3947
3014
  // Partial flush fired exactly once (before the provider error).
@@ -4442,51 +3509,32 @@ describe("session-agent-loop", () => {
4442
3509
  compactableStartIndex: 0,
4443
3510
  };
4444
3511
 
4445
- const rawMidLoopBasis: Message[] = [
4446
- {
4447
- role: "user",
4448
- content: [{ type: "text", text: "fresh DB basis user row" }],
4449
- },
4450
- {
4451
- role: "assistant",
4452
- content: [{ type: "text", text: "partial assistant response" }],
4453
- },
4454
- ];
4455
3512
  const maybeCompactInputs: Message[][] = [];
4456
- let runCount = 0;
4457
- const agentLoopRun: AgentLoopRun = async (
4458
- messages,
4459
- _onEvent,
4460
- options,
4461
- ) => {
4462
- runCount++;
4463
- if (runCount === 1) {
4464
- // The loop reaches its mid-loop budget checkpoint with the raw
4465
- // persistent basis as its in-loop history; the wrapped onCheckpoint
4466
- // trips the gate and runs inline compaction over that basis.
4467
- mockEstimateTokens = 90_000;
4468
- const decision = await options?.onCheckpoint?.({
4469
- turnIndex: 0,
4470
- toolCount: 1,
4471
- hasToolUse: true,
4472
- history: rawMidLoopBasis,
4473
- });
4474
- mockEstimateTokens = 1000;
4475
- if (decision !== "continue") {
4476
- return rawMidLoopBasis;
4477
- }
4478
- }
4479
- return [
4480
- ...messages,
4481
- {
4482
- role: "assistant" as const,
4483
- content: [{ type: "text" as const, text: "final response" }],
4484
- },
4485
- ];
4486
- };
4487
3513
 
3514
+ // AND a real loop that runs one tool turn and then a final text turn.
3515
+ // The tool executor raises the token estimate above the mid-loop budget
3516
+ // threshold so the loop compacts in place at the post-tool checkpoint —
3517
+ // over its own in-loop history, which does not match the loaded Slack
3518
+ // rows.
4488
3519
  const ctx = makeCtx({
4489
- agentLoopRun,
3520
+ providerResponses: [
3521
+ toolUseResponse("tu-mid-loop", "file_read", { path: "/foo" }),
3522
+ textResponse("final response"),
3523
+ ],
3524
+ loopTools: [
3525
+ {
3526
+ name: "file_read",
3527
+ description: "Read a file",
3528
+ input_schema: {
3529
+ type: "object",
3530
+ properties: { path: { type: "string" } },
3531
+ },
3532
+ },
3533
+ ],
3534
+ toolExecutor: async () => {
3535
+ mockEstimateTokens = 90_000;
3536
+ return { content: "ok", isError: false };
3537
+ },
4490
3538
  channelCapabilities: {
4491
3539
  channel: "slack",
4492
3540
  dashboardCapable: false,
@@ -4523,6 +3571,9 @@ describe("session-agent-loop", () => {
4523
3571
  summaryText: "",
4524
3572
  };
4525
3573
  }
3574
+ // The mid-loop gate compacted its in-loop basis; drop the estimate
3575
+ // back under budget so the post-compaction provider call proceeds.
3576
+ mockEstimateTokens = 1000;
4526
3577
  return {
4527
3578
  compacted: true,
4528
3579
  messages: [
@@ -4551,7 +3602,9 @@ describe("session-agent-loop", () => {
4551
3602
  await runAgentLoopImpl(ctx, "next reply", "user-msg-mid-loop", () => {});
4552
3603
 
4553
3604
  expect(maybeCompactInputs[0]).toBe(renderedSlackMessages);
4554
- expect(maybeCompactInputs[1]).toBe(rawMidLoopBasis);
3605
+ // The mid-loop gate compacts the loop's own in-loop history, never the
3606
+ // loaded Slack rows — the mismatch this test guards against.
3607
+ expect(maybeCompactInputs[1]).not.toBe(renderedSlackMessages);
4555
3608
  expect(getSlackCompactionWatermarkForPrefixMock).toHaveBeenCalledWith(
4556
3609
  null,
4557
3610
  2,
@@ -4824,67 +3877,32 @@ describe("session-agent-loop", () => {
4824
3877
  estimatedTokens: 5000,
4825
3878
  });
4826
3879
 
4827
- let callCount = 0;
4828
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
4829
- callCount++;
4830
- // Prime the assistant row anchor production code emits this from
4831
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
4832
- // need this on every invocation: each agent-loop iteration reserves
4833
- // its own row.
4834
- await onEvent({ type: "llm_call_started" });
4835
- if (callCount === 1) {
4836
- // Trigger convergence path: error + appended assistant message so
4837
- // updatedHistory.length > preRunHistoryLength at the strip site.
4838
- onEvent({
4839
- type: "error",
4840
- error: new Error("context_length_exceeded"),
4841
- });
4842
- onEvent({
4843
- type: "usage",
4844
- inputTokens: 100,
4845
- outputTokens: 0,
4846
- model: "test-model",
4847
- providerDurationMs: 50,
4848
- });
4849
- return [
4850
- ...messages,
4851
- {
4852
- role: "assistant" as const,
4853
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
4854
- },
4855
- ];
4856
- }
4857
- onEvent({
4858
- type: "message_complete",
4859
- message: {
4860
- role: "assistant",
4861
- content: [{ type: "text", text: "recovered" }],
4862
- },
4863
- });
4864
- onEvent({
4865
- type: "usage",
4866
- inputTokens: 50,
4867
- outputTokens: 25,
4868
- model: "test-model",
4869
- providerDurationMs: 100,
4870
- });
4871
- return [
4872
- ...messages,
3880
+ // GIVEN a real loop that appends a tool turn (so the run reports
3881
+ // `appendedNewMessages`) and then rejects with a context-too-large
3882
+ // error on the following call — the orchestrator strips that appended
3883
+ // history during its bounded convergence path before a final call
3884
+ // recovers.
3885
+ const ctx = makeCtx({
3886
+ providerResponses: [
3887
+ toolUseResponse("t1", "file_read", {}),
3888
+ new Error("context_length_exceeded"),
3889
+ textResponse("recovered"),
3890
+ ],
3891
+ loopTools: [
4873
3892
  {
4874
- role: "assistant" as const,
4875
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
3893
+ name: "file_read",
3894
+ description: "Read a file",
3895
+ input_schema: { type: "object", properties: {} },
4876
3896
  },
4877
- ];
4878
- };
4879
-
4880
- const ctx = makeCtx({
4881
- agentLoopRun,
3897
+ ],
3898
+ toolExecutor: async () => ({ content: "ok", isError: false }),
4882
3899
  contextWindowManager: {
4883
3900
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
4884
3901
  maybeCompact: async () => ({ compacted: false }),
4885
3902
  } as unknown as AgentLoopConversationContext["contextWindowManager"],
4886
3903
  });
4887
3904
 
3905
+ // WHEN the orchestrator runs the turn to completion
4888
3906
  await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
4889
3907
 
4890
3908
  const stripCalls = setConversationHistoryStrippedAtMock.mock.calls.filter(
@@ -4909,59 +3927,24 @@ describe("session-agent-loop", () => {
4909
3927
  estimatedTokens: 5000,
4910
3928
  });
4911
3929
 
4912
- let callCount = 0;
4913
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
4914
- callCount++;
4915
- // Prime the assistant row anchor — production code emits this from
4916
- // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
4917
- // need this on every invocation: each agent-loop iteration reserves
4918
- // its own row.
4919
- await onEvent({ type: "llm_call_started" });
4920
- if (callCount === 1) {
4921
- onEvent({
4922
- type: "error",
4923
- error: new Error("context_length_exceeded"),
4924
- });
4925
- onEvent({
4926
- type: "usage",
4927
- inputTokens: 100,
4928
- outputTokens: 0,
4929
- model: "test-model",
4930
- providerDurationMs: 50,
4931
- });
4932
- return [
4933
- ...messages,
4934
- {
4935
- role: "assistant" as const,
4936
- content: [{ type: "text", text: "partial" }] as ContentBlock[],
4937
- },
4938
- ];
4939
- }
4940
- onEvent({
4941
- type: "message_complete",
4942
- message: {
4943
- role: "assistant",
4944
- content: [{ type: "text", text: "recovered" }],
4945
- },
4946
- });
4947
- onEvent({
4948
- type: "usage",
4949
- inputTokens: 50,
4950
- outputTokens: 25,
4951
- model: "test-model",
4952
- providerDurationMs: 100,
4953
- });
4954
- return [
4955
- ...messages,
3930
+ // GIVEN a real loop that appends a tool turn and then rejects with a
3931
+ // context-too-large error on the following call, driving the
3932
+ // convergence strip whose marker-write helper is stubbed to throw,
3933
+ // before a final call recovers.
3934
+ const ctx = makeCtx({
3935
+ providerResponses: [
3936
+ toolUseResponse("t1", "file_read", {}),
3937
+ new Error("context_length_exceeded"),
3938
+ textResponse("recovered"),
3939
+ ],
3940
+ loopTools: [
4956
3941
  {
4957
- role: "assistant" as const,
4958
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
3942
+ name: "file_read",
3943
+ description: "Read a file",
3944
+ input_schema: { type: "object", properties: {} },
4959
3945
  },
4960
- ];
4961
- };
4962
-
4963
- const ctx = makeCtx({
4964
- agentLoopRun,
3946
+ ],
3947
+ toolExecutor: async () => ({ content: "ok", isError: false }),
4965
3948
  contextWindowManager: {
4966
3949
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
4967
3950
  maybeCompact: async () => ({ compacted: false }),