@vellumai/assistant 0.8.7 → 0.8.8-dev.202606052332.17fc8ea

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (570) hide show
  1. package/Dockerfile +20 -4
  2. package/bun.lock +2 -2
  3. package/docker-entrypoint.sh +4 -2
  4. package/docker-init-apt-root.sh +3 -1
  5. package/docker-kata-apt-env.sh +3 -1
  6. package/docker-kata-runtime-family.sh +12 -0
  7. package/docs/architecture/memory.md +1 -1
  8. package/examples/plugins/echo/README.md +61 -66
  9. package/examples/plugins/echo/hooks/post-tool-use.ts +18 -0
  10. package/examples/plugins/echo/hooks/stop.ts +16 -0
  11. package/examples/plugins/echo/hooks/user-prompt-submit.ts +18 -0
  12. package/examples/plugins/echo/package.json +1 -2
  13. package/examples/plugins/echo/src/emit.ts +19 -0
  14. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  15. package/node_modules/@vellumai/skill-host-contracts/src/skill-host.ts +7 -6
  16. package/openapi.yaml +3378 -335
  17. package/package.json +2 -2
  18. package/scripts/generate-openapi.ts +68 -41
  19. package/src/__tests__/agent-loop-exit-reason.test.ts +35 -93
  20. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  21. package/src/__tests__/agent-loop.test.ts +37 -87
  22. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  23. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  24. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  25. package/src/__tests__/anthropic-provider.test.ts +95 -2
  26. package/src/__tests__/app-control-flow.test.ts +1 -1
  27. package/src/__tests__/app-dir-path-guard.test.ts +1 -0
  28. package/src/__tests__/approval-routes-http.test.ts +4 -1
  29. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  30. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  31. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  32. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  33. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  34. package/src/__tests__/btw-routes.test.ts +62 -3
  35. package/src/__tests__/build-persisted-content.test.ts +184 -0
  36. package/src/__tests__/catalog-files.test.ts +1 -1
  37. package/src/__tests__/channel-approval-routes.test.ts +1 -1
  38. package/src/__tests__/channel-approvals.test.ts +1 -1
  39. package/src/__tests__/clawhub-files.test.ts +1 -1
  40. package/src/__tests__/compaction-circuit.test.ts +258 -0
  41. package/src/__tests__/compaction-direct.test.ts +132 -0
  42. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  43. package/src/__tests__/config-watcher.test.ts +1 -1
  44. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  45. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -5
  46. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -7
  47. package/src/__tests__/conversation-agent-loop-overflow.test.ts +316 -1143
  48. package/src/__tests__/conversation-agent-loop.test.ts +638 -1655
  49. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  50. package/src/__tests__/conversation-clean-command.test.ts +5 -2
  51. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  52. package/src/__tests__/conversation-pairing.test.ts +4 -31
  53. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  54. package/src/__tests__/conversation-provider-retry-repair.test.ts +30 -10
  55. package/src/__tests__/conversation-queue.test.ts +2 -0
  56. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  57. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  58. package/src/__tests__/conversation-runtime-assembly.test.ts +310 -300
  59. package/src/__tests__/conversation-runtime-workspace.test.ts +105 -45
  60. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  61. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  62. package/src/__tests__/conversation-starter-routes.test.ts +14 -6
  63. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  64. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  65. package/src/__tests__/conversation-title-service.test.ts +135 -2
  66. package/src/__tests__/conversation-workspace-cache-state.test.ts +17 -16
  67. package/src/__tests__/conversation-workspace-injection.test.ts +67 -2
  68. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +7 -6
  69. package/src/__tests__/conversations-import-system-filter.test.ts +101 -0
  70. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  71. package/src/__tests__/db-acp-history.test.ts +101 -0
  72. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  73. package/src/__tests__/dm-persistence.test.ts +5 -1
  74. package/src/__tests__/dynamic-page-surface.test.ts +31 -0
  75. package/src/__tests__/empty-response-hook.test.ts +304 -0
  76. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  77. package/src/__tests__/file-write-tool.test.ts +63 -0
  78. package/src/__tests__/gateway-only-guard.test.ts +12 -2
  79. package/src/__tests__/gemini-image-service.test.ts +13 -0
  80. package/src/__tests__/guardian-grant-minting.test.ts +1 -1
  81. package/src/__tests__/guardian-routing-invariants.test.ts +2 -4
  82. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
  83. package/src/__tests__/heartbeat-disk-pressure.test.ts +1 -0
  84. package/src/__tests__/heartbeat-service.test.ts +1 -0
  85. package/src/__tests__/helpers/mock-provider.ts +110 -0
  86. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  87. package/src/__tests__/history-repair-hook.test.ts +1 -0
  88. package/src/__tests__/host-app-control-routes.test.ts +1 -1
  89. package/src/__tests__/host-cu-routes-targeted.test.ts +3 -3
  90. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  91. package/src/__tests__/identity-routes.test.ts +248 -7
  92. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  93. package/src/__tests__/injector-background-turn.test.ts +3 -9
  94. package/src/__tests__/injector-chain.test.ts +139 -275
  95. package/src/__tests__/injector-disk-pressure.test.ts +75 -41
  96. package/src/__tests__/injector-document-comments.test.ts +3 -3
  97. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  98. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  99. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  100. package/src/__tests__/list-messages-hidden-metadata.test.ts +38 -0
  101. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  102. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  103. package/src/__tests__/llm-usage-store.test.ts +223 -1
  104. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  105. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  106. package/src/__tests__/native-web-search.test.ts +191 -0
  107. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  108. package/src/__tests__/openai-image-service.test.ts +17 -0
  109. package/src/__tests__/openai-provider.test.ts +31 -1
  110. package/src/__tests__/{overflow-reduce-pipeline.test.ts → overflow-reduction-loop.test.ts} +64 -284
  111. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  112. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  113. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  114. package/src/__tests__/plugin-api-shim.test.ts +3 -6
  115. package/src/__tests__/plugin-bootstrap.test.ts +14 -40
  116. package/src/__tests__/plugin-registry.test.ts +3 -76
  117. package/src/__tests__/plugin-types.test.ts +0 -193
  118. package/src/__tests__/process-message-display-content.test.ts +6 -2
  119. package/src/__tests__/reaction-persistence.test.ts +1 -1
  120. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  121. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  122. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  123. package/src/__tests__/schedule-routes.test.ts +603 -2
  124. package/src/__tests__/schedule-store.test.ts +41 -0
  125. package/src/__tests__/schedule-tools.test.ts +35 -0
  126. package/src/__tests__/send-endpoint-busy.test.ts +4 -1
  127. package/src/__tests__/server-history-render.test.ts +314 -1
  128. package/src/__tests__/skill-feature-flags-integration.test.ts +33 -0
  129. package/src/__tests__/skillssh-files.test.ts +1 -1
  130. package/src/__tests__/subagent-call-site-routing.test.ts +1 -1
  131. package/src/__tests__/subagent-fork-notifications.test.ts +1 -3
  132. package/src/__tests__/subagent-fork-spawn.test.ts +1 -1
  133. package/src/__tests__/subagent-manager-notify.test.ts +1 -3
  134. package/src/__tests__/subagent-notify-parent.test.ts +1 -3
  135. package/src/__tests__/subagent-spawn-tool-fork.test.ts +1 -1
  136. package/src/__tests__/system-prompt.test.ts +20 -0
  137. package/src/__tests__/task-scheduler.test.ts +162 -1
  138. package/src/__tests__/terminal-tools.test.ts +6 -1
  139. package/src/__tests__/title-generate-hook.test.ts +319 -0
  140. package/src/__tests__/tool-error-hook.test.ts +278 -0
  141. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  142. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  143. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  144. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  145. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  146. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  147. package/src/__tests__/usage-routes.test.ts +285 -1
  148. package/src/__tests__/user-plugin-loader.test.ts +54 -286
  149. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  150. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  151. package/src/acp/__tests__/agent-process.test.ts +161 -0
  152. package/src/acp/__tests__/client-handler.test.ts +40 -0
  153. package/src/acp/__tests__/helpers/acp-history-db.ts +82 -0
  154. package/src/acp/__tests__/helpers/exec-file-stub.ts +101 -0
  155. package/src/acp/__tests__/prepare-agent-env.test.ts +137 -0
  156. package/src/acp/__tests__/session-manager-persistence.test.ts +95 -28
  157. package/src/acp/__tests__/session-manager-resume.test.ts +736 -0
  158. package/src/acp/agent-process.ts +61 -1
  159. package/src/acp/auto-install.test.ts +196 -0
  160. package/src/acp/auto-install.ts +177 -0
  161. package/src/acp/client-handler.ts +31 -0
  162. package/src/acp/feature-gate.test.ts +48 -0
  163. package/src/acp/feature-gate.ts +34 -0
  164. package/src/acp/prepare-agent-env.ts +83 -29
  165. package/src/acp/resolve-agent.test.ts +320 -7
  166. package/src/acp/resolve-agent.ts +182 -18
  167. package/src/acp/resume-hint.ts +25 -0
  168. package/src/acp/session-manager.ts +495 -73
  169. package/src/acp/types.ts +8 -0
  170. package/src/agent/compaction-circuit.ts +60 -102
  171. package/src/agent/loop.ts +362 -485
  172. package/src/api/events/assistant-thinking-delta.ts +33 -0
  173. package/src/api/events/tool-output-chunk.ts +45 -0
  174. package/src/api/events/tool-use-preview-start.ts +32 -0
  175. package/src/api/events/trace-event.ts +69 -0
  176. package/src/api/index.ts +48 -13
  177. package/src/api/responses/conversation-message.ts +374 -0
  178. package/src/approvals/guardian-request-resolvers.ts +1 -1
  179. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  180. package/src/background-wake/next-wake.ts +1 -0
  181. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  182. package/src/cli/commands/notifications.ts +112 -60
  183. package/src/config/__tests__/feature-flag-registry-guard.test.ts +2 -2
  184. package/src/config/acp-defaults.test.ts +10 -0
  185. package/src/config/acp-defaults.ts +6 -0
  186. package/src/config/assistant-feature-flags.ts +22 -11
  187. package/src/config/bundled-skills/acp/SKILL.md +83 -31
  188. package/src/config/bundled-skills/acp/TOOLS.json +4 -4
  189. package/src/config/bundled-skills/app-builder/SKILL.md +224 -398
  190. package/src/config/bundled-skills/app-builder/TOOLS.json +29 -0
  191. package/src/config/bundled-skills/app-builder/references/DESIGN_SYSTEM.md +48 -0
  192. package/src/config/bundled-skills/app-builder/references/RESPONSIVE.md +57 -0
  193. package/src/config/bundled-skills/app-builder/references/SLIDES.md +38 -0
  194. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  195. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  196. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  197. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  198. package/src/config/bundled-skills/app-builder/tools/app-list.ts +62 -0
  199. package/src/config/bundled-skills/document-editor/SKILL.md +28 -23
  200. package/src/config/bundled-skills/document-editor/TOOLS.json +1 -1
  201. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  202. package/src/config/bundled-tool-registry.ts +2 -0
  203. package/src/config/feature-flag-cache.ts +3 -3
  204. package/src/config/feature-flag-registry.json +48 -7
  205. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  206. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  207. package/src/config/schemas/heartbeat.ts +9 -0
  208. package/src/config/schemas/llm.ts +1 -0
  209. package/src/config/schemas/memory-v2.ts +8 -0
  210. package/src/config/schemas/memory-v3.ts +8 -0
  211. package/src/config/schemas/platform.ts +8 -0
  212. package/src/config/seed-inference-profiles.ts +2 -2
  213. package/src/config/skills.ts +13 -0
  214. package/src/context/compactor.ts +1 -1
  215. package/src/context/strip-injections.ts +128 -0
  216. package/src/context/token-estimator.ts +23 -0
  217. package/src/context/tool-result-truncation.ts +0 -23
  218. package/src/context/window-manager.ts +5 -7
  219. package/src/credential-execution/executable-discovery.ts +16 -0
  220. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  221. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  222. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  223. package/src/daemon/assistant-attachments.ts +1 -1
  224. package/src/daemon/config-watcher.ts +2 -2
  225. package/src/daemon/context-overflow-reducer.ts +0 -1
  226. package/src/daemon/conversation-agent-loop-handlers.ts +594 -153
  227. package/src/daemon/conversation-agent-loop.ts +301 -997
  228. package/src/daemon/conversation-history.ts +5 -4
  229. package/src/daemon/conversation-lifecycle.ts +3 -4
  230. package/src/daemon/conversation-messaging.ts +7 -6
  231. package/src/daemon/conversation-process.ts +11 -16
  232. package/src/daemon/conversation-registry.ts +159 -0
  233. package/src/daemon/conversation-runtime-assembly.ts +218 -398
  234. package/src/daemon/conversation-slash.ts +6 -25
  235. package/src/daemon/conversation-store.ts +9 -90
  236. package/src/daemon/conversation-surfaces.ts +222 -4
  237. package/src/daemon/conversation-tool-setup.ts +2 -29
  238. package/src/daemon/conversation-workspace.ts +17 -0
  239. package/src/daemon/conversation.ts +32 -20
  240. package/src/daemon/external-plugins-bootstrap.ts +17 -18
  241. package/src/daemon/handlers/config-a2a.ts +51 -36
  242. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  243. package/src/daemon/handlers/config-telegram.ts +16 -2
  244. package/src/daemon/handlers/conversations.ts +3 -1
  245. package/src/daemon/handlers/shared.ts +156 -84
  246. package/src/daemon/handlers/skills.ts +42 -10
  247. package/src/daemon/lifecycle.ts +25 -0
  248. package/src/daemon/message-types/apps.ts +1 -29
  249. package/src/daemon/message-types/messages.ts +9 -57
  250. package/src/daemon/message-types/skills.ts +2 -0
  251. package/src/daemon/message-types/surfaces.ts +136 -3
  252. package/src/daemon/now-scratchpad.ts +21 -0
  253. package/src/daemon/orphan-reaper.test.ts +210 -0
  254. package/src/daemon/orphan-reaper.ts +240 -0
  255. package/src/daemon/overflow-reduction-loop.ts +230 -0
  256. package/src/daemon/persist-unsendable-image.ts +117 -0
  257. package/src/daemon/process-message.ts +1 -3
  258. package/src/daemon/server.ts +2 -0
  259. package/src/daemon/trace-emitter.ts +6 -4
  260. package/src/daemon/trust-context.ts +19 -0
  261. package/src/daemon/wake-target-adapter.ts +3 -1
  262. package/src/heartbeat/__tests__/heartbeat-service.test.ts +3 -0
  263. package/src/heartbeat/heartbeat-run-store.ts +23 -1
  264. package/src/heartbeat/heartbeat-service.ts +26 -0
  265. package/src/home/home-greeting-cache.ts +24 -1
  266. package/src/ipc/__tests__/browser-ipc.test.ts +1 -1
  267. package/src/ipc/__tests__/ui-request-route.test.ts +3 -3
  268. package/src/ipc/gateway-client.test.ts +2 -2
  269. package/src/ipc/gateway-client.ts +3 -3
  270. package/src/ipc/skill-routes/__tests__/memory.test.ts +15 -0
  271. package/src/ipc/skill-routes/memory.ts +4 -2
  272. package/src/media/gemini-image-service.ts +15 -0
  273. package/src/media/openai-image-service.ts +14 -0
  274. package/src/media/types.ts +34 -0
  275. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  276. package/src/memory/auth-fallback-events-store.ts +94 -0
  277. package/src/memory/conversation-starter-checkpoints.ts +1 -0
  278. package/src/memory/conversation-title-service.ts +65 -41
  279. package/src/memory/db-init.ts +6 -0
  280. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  281. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  282. package/src/memory/job-handlers/conversation-starters.ts +13 -2
  283. package/src/memory/jobs-store.ts +33 -0
  284. package/src/memory/jobs-worker.ts +32 -5
  285. package/src/memory/llm-usage-store.ts +224 -50
  286. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  287. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  288. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  289. package/src/memory/migrations/272-acp-session-history-cwd.ts +36 -0
  290. package/src/memory/migrations/index.ts +3 -0
  291. package/src/memory/pkb/autoinject.ts +61 -0
  292. package/src/memory/pkb/context.ts +50 -0
  293. package/src/memory/pkb/types.ts +14 -0
  294. package/src/memory/schedule-attribution-sql.ts +104 -0
  295. package/src/memory/schema/acp.ts +4 -0
  296. package/src/memory/schema/infrastructure.ts +16 -0
  297. package/src/memory/usage-grouped-buckets.ts +6 -1
  298. package/src/memory/v2/__tests__/consolidation-job.test.ts +4 -4
  299. package/src/memory/v2/consolidation-job.ts +14 -5
  300. package/src/notifications/conversation-pairing.ts +8 -15
  301. package/src/notifications/decision-engine.ts +6 -3
  302. package/src/notifications/home-feed-side-effect.ts +12 -1
  303. package/src/permissions/prompter.ts +4 -0
  304. package/src/plugin-api/constants.ts +4 -0
  305. package/src/plugin-api/index.ts +7 -5
  306. package/src/plugin-api/types.ts +151 -1
  307. package/src/plugins/defaults/compaction/compact.ts +59 -0
  308. package/src/plugins/defaults/compaction/package.json +1 -1
  309. package/src/plugins/defaults/compaction/register.ts +8 -19
  310. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  311. package/src/plugins/defaults/empty-response/register.ts +8 -13
  312. package/src/plugins/defaults/index.ts +2 -18
  313. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +95 -0
  314. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  315. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  316. package/src/plugins/defaults/{injectors/register.ts → memory-retrieval/injectors.ts} +288 -81
  317. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/assign.test.ts +4 -4
  318. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/health.test.ts +16 -0
  319. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/live-integration.test.ts +4 -4
  320. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/maintain-job.test.ts +5 -5
  321. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/orchestrate.test.ts +48 -12
  322. package/src/plugins/defaults/memory-v3-shadow/__tests__/provider-blocks.test.ts +13 -0
  323. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/reconcile.test.ts +2 -2
  324. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/render-injection.test.ts +1 -1
  325. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/router.test.ts +104 -32
  326. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selection-log-store.test.ts +8 -8
  327. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selector.test.ts +96 -30
  328. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/shadow-plugin.test.ts +34 -16
  329. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/assign.ts +5 -5
  330. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/capabilities.ts +2 -2
  331. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/health.ts +0 -0
  332. package/src/plugins/defaults/memory-v3-shadow/hooks/post-compact.ts +14 -0
  333. package/src/plugins/defaults/memory-v3-shadow/hooks/user-prompt-submit.ts +19 -0
  334. package/src/plugins/defaults/memory-v3-shadow/injector.ts +75 -0
  335. package/src/plugins/defaults/memory-v3-shadow/llm-retry.ts +32 -0
  336. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/maintain-job.ts +8 -8
  337. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/orchestrate.ts +26 -14
  338. package/src/plugins/defaults/{llm-call → memory-v3-shadow}/package.json +2 -2
  339. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/page-content.ts +2 -2
  340. package/src/plugins/defaults/memory-v3-shadow/provider-blocks.ts +26 -0
  341. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/reconcile.ts +3 -3
  342. package/src/plugins/defaults/memory-v3-shadow/register.ts +26 -0
  343. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/render-injection.ts +1 -1
  344. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/router.ts +51 -45
  345. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selection-log-store.ts +4 -4
  346. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selector.ts +61 -46
  347. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/shadow-plugin.ts +69 -99
  348. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/tree.ts +1 -1
  349. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/types.ts +8 -0
  350. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  351. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  352. package/src/plugins/defaults/title-generate/package.json +1 -1
  353. package/src/plugins/defaults/title-generate/register.ts +18 -18
  354. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  355. package/src/plugins/defaults/tool-error/package.json +1 -1
  356. package/src/plugins/defaults/tool-error/register.ts +9 -21
  357. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  358. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  359. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  360. package/src/plugins/external-api.ts +2 -2
  361. package/src/plugins/pipeline.ts +6 -305
  362. package/src/plugins/registry.ts +10 -55
  363. package/src/plugins/types.ts +62 -797
  364. package/src/plugins/user-loader.ts +30 -127
  365. package/src/proactive-artifact/aux-message-injector.ts +4 -4
  366. package/src/proactive-artifact/job.test.ts +8 -13
  367. package/src/prompts/__tests__/system-prompt.test.ts +42 -0
  368. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +64 -0
  369. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  370. package/src/prompts/templates/system-sections.ts +15 -0
  371. package/src/providers/anthropic/client.ts +37 -29
  372. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  373. package/src/providers/openai/chat-completions-provider.ts +44 -0
  374. package/src/providers/openrouter/client.ts +1 -0
  375. package/src/providers/placeholder-sentinels.ts +35 -0
  376. package/src/runtime/__tests__/agent-wake.test.ts +10 -6
  377. package/src/runtime/__tests__/interactive-ui.test.ts +1 -1
  378. package/src/runtime/agent-wake.ts +2 -5
  379. package/src/runtime/assistant-event-hub.ts +37 -7
  380. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  381. package/src/runtime/channel-approvals.ts +1 -1
  382. package/src/runtime/http-router.ts +16 -21
  383. package/src/runtime/http-types.ts +16 -70
  384. package/src/runtime/interactive-ui.ts +1 -1
  385. package/src/runtime/pending-interactions.ts +1 -0
  386. package/src/runtime/routes/__tests__/acp-routes.test.ts +283 -55
  387. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  388. package/src/runtime/routes/__tests__/conversation-list-routes.test.ts +1 -1
  389. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  390. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  391. package/src/runtime/routes/__tests__/surface-action-routes.test.ts +5 -4
  392. package/src/runtime/routes/__tests__/surface-content-routes.test.ts +4 -1
  393. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  394. package/src/runtime/routes/acp-routes.test.ts +89 -25
  395. package/src/runtime/routes/acp-routes.ts +81 -29
  396. package/src/runtime/routes/app-management-routes.ts +6 -117
  397. package/src/runtime/routes/app-routes.ts +13 -15
  398. package/src/runtime/routes/approval-routes.ts +1 -1
  399. package/src/runtime/routes/attachment-routes.ts +26 -15
  400. package/src/runtime/routes/avatar-routes.ts +26 -0
  401. package/src/runtime/routes/browser-routes.ts +1 -1
  402. package/src/runtime/routes/browser-tabs-routes.ts +6 -10
  403. package/src/runtime/routes/btw-routes.ts +29 -23
  404. package/src/runtime/routes/consolidation-routes.ts +120 -20
  405. package/src/runtime/routes/conversation-cli-routes.ts +1 -1
  406. package/src/runtime/routes/conversation-list-routes.ts +1 -1
  407. package/src/runtime/routes/conversation-query-routes.ts +3 -1
  408. package/src/runtime/routes/conversation-routes.ts +372 -185
  409. package/src/runtime/routes/conversation-starter-routes.ts +13 -7
  410. package/src/runtime/routes/conversations-import-routes.ts +24 -7
  411. package/src/runtime/routes/documents-routes.ts +4 -0
  412. package/src/runtime/routes/domain-routes.ts +51 -37
  413. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  414. package/src/runtime/routes/events-routes.ts +28 -34
  415. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  416. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  417. package/src/runtime/routes/host-app-control-routes.ts +1 -1
  418. package/src/runtime/routes/host-cu-routes.ts +1 -1
  419. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  420. package/src/runtime/routes/identity-routes.ts +224 -18
  421. package/src/runtime/routes/image-generation-routes.ts +40 -2
  422. package/src/runtime/routes/inbound-message-handler.ts +1 -1
  423. package/src/runtime/routes/index.ts +2 -0
  424. package/src/runtime/routes/integrations/a2a.ts +12 -10
  425. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  426. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  427. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  428. package/src/runtime/routes/integrations/telegram.ts +6 -0
  429. package/src/runtime/routes/integrations/twilio.ts +42 -0
  430. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  431. package/src/runtime/routes/log-export-routes.ts +8 -0
  432. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  433. package/src/runtime/routes/memory-v3-routes.ts +66 -34
  434. package/src/runtime/routes/oauth-apps.ts +66 -12
  435. package/src/runtime/routes/oauth-providers.ts +44 -5
  436. package/src/runtime/routes/platform-routes.ts +81 -5
  437. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  438. package/src/runtime/routes/playground/force-compact.ts +1 -1
  439. package/src/runtime/routes/playground/helpers.ts +1 -1
  440. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  441. package/src/runtime/routes/schedule-routes.ts +152 -42
  442. package/src/runtime/routes/secret-routes.ts +14 -2
  443. package/src/runtime/routes/skills-routes.ts +43 -14
  444. package/src/runtime/routes/surface-conversation-resolver.ts +4 -3
  445. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  446. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  447. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  448. package/src/runtime/routes/tts-routes.ts +35 -0
  449. package/src/runtime/routes/types.ts +66 -8
  450. package/src/runtime/routes/usage-routes.ts +47 -39
  451. package/src/runtime/routes/webhook-routes.ts +41 -2
  452. package/src/runtime/routes/work-items-routes.ts +2 -4
  453. package/src/runtime/routes/workspace-routes.ts +4 -0
  454. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  455. package/src/runtime/services/analyze-conversation.ts +2 -2
  456. package/src/runtime/services/conversation-serializer.ts +1 -1
  457. package/src/schedule/schedule-store.ts +20 -1
  458. package/src/schedule/schedule-usage-store.ts +83 -0
  459. package/src/schedule/scheduler.ts +12 -5
  460. package/src/signals/cancel.ts +2 -4
  461. package/src/skills/catalog-files.ts +2 -2
  462. package/src/skills/catalog-install.ts +3 -0
  463. package/src/skills/categories-cache.ts +118 -0
  464. package/src/skills/clawhub-files.ts +1 -2
  465. package/src/skills/skillssh-files.ts +1 -2
  466. package/src/subagent/manager.ts +17 -5
  467. package/src/telemetry/types.ts +29 -1
  468. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  469. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  470. package/src/tools/acp/context.ts +20 -0
  471. package/src/tools/acp/list-agents.test.ts +7 -1
  472. package/src/tools/acp/spawn.test.ts +158 -55
  473. package/src/tools/acp/spawn.ts +47 -72
  474. package/src/tools/acp/steer.test.ts +105 -8
  475. package/src/tools/acp/steer.ts +48 -17
  476. package/src/tools/apps/executors.ts +13 -8
  477. package/src/tools/executor.ts +1 -53
  478. package/src/tools/filesystem/write.ts +34 -0
  479. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  480. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  481. package/src/tools/network/web-search-error.test.ts +248 -0
  482. package/src/tools/network/web-search-error.ts +267 -0
  483. package/src/tools/network/web-search.ts +207 -48
  484. package/src/tools/schedule/create.ts +2 -0
  485. package/src/tools/subagent/spawn.ts +2 -4
  486. package/src/tools/terminal/safe-env.ts +10 -1
  487. package/src/tools/ui-surface/definitions.ts +34 -5
  488. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  489. package/src/tts/provider-catalog.ts +76 -1
  490. package/src/util/mutex.ts +47 -0
  491. package/src/workspace/git-service.ts +1 -42
  492. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +4 -5
  493. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  494. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  495. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +117 -0
  496. package/src/workspace/migrations/registry.ts +6 -0
  497. package/docs/plugins.md +0 -836
  498. package/examples/plugins/echo/register.ts +0 -184
  499. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  500. package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -405
  501. package/src/__tests__/compaction-pipeline.test.ts +0 -210
  502. package/src/__tests__/compaction-timeout-recovery.test.ts +0 -251
  503. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  504. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  505. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  506. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  507. package/src/__tests__/pipeline-runner.test.ts +0 -564
  508. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  509. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  510. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  511. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  512. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  513. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  514. package/src/gallery/default-gallery.ts +0 -1359
  515. package/src/gallery/gallery-manifest.ts +0 -28
  516. package/src/home/feature-gate.ts +0 -22
  517. package/src/memory/v3/provider-blocks.ts +0 -16
  518. package/src/plugins/defaults/circuit-breaker/middlewares/circuitBreaker.ts +0 -93
  519. package/src/plugins/defaults/circuit-breaker/package.json +0 -15
  520. package/src/plugins/defaults/circuit-breaker/register.ts +0 -39
  521. package/src/plugins/defaults/compaction/middlewares/compaction.ts +0 -25
  522. package/src/plugins/defaults/compaction/terminal.ts +0 -73
  523. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  524. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  525. package/src/plugins/defaults/injectors/package.json +0 -15
  526. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  527. package/src/plugins/defaults/llm-call/register.ts +0 -45
  528. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  529. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  530. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  531. package/src/plugins/defaults/overflow-reduce/middlewares/overflowReduce.ts +0 -126
  532. package/src/plugins/defaults/overflow-reduce/package.json +0 -15
  533. package/src/plugins/defaults/overflow-reduce/register.ts +0 -42
  534. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  535. package/src/plugins/defaults/persistence/package.json +0 -15
  536. package/src/plugins/defaults/persistence/register.ts +0 -38
  537. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  538. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  539. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  540. package/src/plugins/defaults/token-estimate/package.json +0 -15
  541. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  542. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  543. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  544. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  545. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  546. package/src/plugins/defaults/tool-execute/package.json +0 -15
  547. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  548. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  549. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  550. package/src/skills/category-inference.ts +0 -111
  551. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/capabilities.test.ts +0 -0
  552. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/core.test.ts +0 -0
  553. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/eval-turns.json +0 -0
  554. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/live-turns.json +0 -0
  555. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/needle.test.ts +0 -0
  556. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/snapshot.test.ts +0 -0
  557. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/tree.test.ts +0 -0
  558. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/types.test.ts +0 -0
  559. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-eviction.test.ts +0 -0
  560. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-skeleton.test.ts +0 -0
  561. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/core.ts +0 -0
  562. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/README.md +0 -0
  563. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/assignments.json +0 -0
  564. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/core.json +0 -0
  565. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-x.md +0 -0
  566. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-y.md +0 -0
  567. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-b/topic-z.md +0 -0
  568. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/needle.ts +0 -0
  569. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/snapshot.ts +0 -0
  570. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/working-set.ts +0 -0
@@ -14,27 +14,11 @@
14
14
  import { createRequire } from "node:module";
15
15
  import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
16
16
 
17
- import { CompactionCircuit } from "../agent/compaction-circuit.js";
18
- import type {
19
- AgentEvent,
20
- AgentLoopRunOptions,
21
- AgentLoopRunResult,
22
- MidLoopCompaction,
23
- } from "../agent/loop.js";
17
+ import type { LoopToolExecutor } from "../agent/loop.js";
24
18
  import type { LLMConfig } from "../config/schemas/llm.js";
25
- import type { ContextWindowResult } from "../context/window-manager.js";
26
19
  import type { ServerMessage } from "../daemon/message-protocol.js";
27
- import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
28
20
  import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
29
- import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
30
- import { getMiddlewaresFor } from "../plugins/registry.js";
31
- import type {
32
- CompactionArgs,
33
- CompactionResult,
34
- TurnContext,
35
- } from "../plugins/types.js";
36
- import { PluginTimeoutError } from "../plugins/types.js";
37
- import type { ContentBlock, Message } from "../providers/types.js";
21
+ import type { Message, Provider, ToolDefinition } from "../providers/types.js";
38
22
 
39
23
  const conversationCrudRealSnapshot = {
40
24
  ...(createRequire(import.meta.url)(
@@ -103,6 +87,7 @@ mock.module("../config/loader.js", () => ({
103
87
  memory: { retrieval: { scratchpadInjection: { enabled: true } } },
104
88
  ui: {},
105
89
  compaction: { enabled: true, autoThreshold: 0.7 },
90
+ conversations: { skipAutoRetitling: true },
106
91
  }),
107
92
  loadRawConfig: () => ({}),
108
93
  saveRawConfig: () => {},
@@ -114,10 +99,10 @@ mock.module("../config/loader.js", () => ({
114
99
  // Token estimator — controllable per-test via mockEstimateTokens.
115
100
  // Can be a number (constant), a no-arg function, or a function that
116
101
  // receives the messages array for dynamic behavior based on content.
117
- // Both the calibrated entry point (`estimatePromptTokens`, used in the
118
- // convergence path) and the raw entry point (`estimatePromptTokensRaw`,
119
- // used by the default `tokenEstimate` plugin pipeline for preflight/mid-
120
- // loop) are stubbed so either call site can drive the test.
102
+ // Both the calibrated entry point (`estimatePromptTokens`, which backs the
103
+ // preflight overflow gate and the convergence path) and the raw entry point
104
+ // (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
105
+ // stubbed so either call site can drive the test.
121
106
  let mockEstimateTokens: number | ((msgs?: Message[]) => number) = 1000;
122
107
  mock.module("../context/token-estimator.js", () => ({
123
108
  estimatePromptTokens: (msgs: Message[]) =>
@@ -128,8 +113,16 @@ mock.module("../context/token-estimator.js", () => ({
128
113
  typeof mockEstimateTokens === "function"
129
114
  ? mockEstimateTokens(msgs)
130
115
  : mockEstimateTokens,
131
- // Default plugin multiplies-in tool tokens via this helper; 0 keeps the
132
- // stubbed raw value unchanged.
116
+ // The preflight overflow gate calls this calibrated wrapper directly, so it
117
+ // must honor `mockEstimateTokens` too — otherwise the real implementation
118
+ // (which sums tool tokens onto the real calibrated estimate) ignores the
119
+ // per-test value and the overflow scenarios below never trigger.
120
+ estimatePromptTokensWithTools: (history: Message[]) =>
121
+ typeof mockEstimateTokens === "function"
122
+ ? mockEstimateTokens(history)
123
+ : mockEstimateTokens,
124
+ // `estimatePromptTokensWithTools` folds tool tokens in via this helper; 0
125
+ // keeps the stubbed value unchanged.
133
126
  estimateToolsTokens: () => 0,
134
127
  // Conversation agent loop now calls this helper to canonicalize the
135
128
  // provider key shared with the calibration system. The tests here
@@ -281,15 +274,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
281
274
  blocks: {},
282
275
  }),
283
276
  stripInjectionsForCompaction: (msgs: Message[]) => msgs,
284
- findLastInjectedNowContent: () => null,
285
- readNowScratchpad: () => null,
286
- readPkbContext: () => null,
287
- getPkbAutoInjectList: () => [
288
- "INDEX.md",
289
- "essentials.md",
290
- "threads.md",
291
- "buffer.md",
292
- ],
293
277
  isSlackChannelConversation: () => false,
294
278
  getSlackCompactionWatermarkForPrefix: () => null,
295
279
  loadSlackChronologicalContext: () => null,
@@ -437,179 +421,55 @@ mock.module("../memory/archive-store.js", () => ({
437
421
 
438
422
  // ── Imports (after mocks) ────────────────────────────────────────────
439
423
 
424
+ import { AgentLoop } from "../agent/loop.js";
440
425
  import {
441
426
  type AgentLoopConversationContext,
442
427
  runAgentLoopImpl,
443
428
  } from "../daemon/conversation-agent-loop.js";
429
+ import {
430
+ createMockProvider,
431
+ type ScriptedResponse,
432
+ textResponse,
433
+ toolUseResponse,
434
+ } from "./helpers/mock-provider.js";
444
435
 
445
436
  // ── Test helpers ─────────────────────────────────────────────────────
446
437
 
447
- type AgentLoopRun = (
448
- messages: Message[],
449
- onEvent: (event: AgentEvent) => void,
450
- options?: AgentLoopRunOptions,
451
- ) => Promise<Message[]>;
452
-
453
- /**
454
- * Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
455
- * the compaction pipeline against the supplied turn context (which carries the
456
- * test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
457
- * return the continuation history — or `null` on timeout/exhaustion so the
458
- * caller yields "budget".
459
- */
460
- async function simulateInlineCompaction(
461
- compaction: MidLoopCompaction,
462
- history: Message[],
463
- turnContext: TurnContext | undefined,
464
- signal: AbortSignal | undefined,
465
- onEvent: (event: AgentEvent) => void | Promise<void>,
466
- compactionCircuit: CompactionCircuit,
467
- ): Promise<Message[] | null> {
468
- await onEvent({ type: "context_compacting" });
469
- const { rawHistory, options } = compaction.prepare(history);
470
- let result: CompactionResult;
471
- try {
472
- result = await runPipeline<CompactionArgs, CompactionResult>(
473
- "compaction",
474
- getMiddlewaresFor("compaction"),
475
- (args) => defaultCompactionTerminal(args, turnContext as TurnContext),
476
- { messages: rawHistory, signal, options },
477
- turnContext as TurnContext,
478
- DEFAULT_TIMEOUTS.compaction,
479
- );
480
- } catch (error) {
481
- if (error instanceof PluginTimeoutError) {
482
- await compactionCircuit.recordOutcome(
483
- {
484
- currentRequestId: turnContext?.requestId,
485
- currentTurnTrustContext: turnContext?.trust,
486
- turnCount: turnContext?.turnIndex ?? 0,
487
- },
488
- true,
489
- onEvent,
490
- );
491
- return null;
492
- }
493
- throw error;
494
- }
495
- const compactResult = result as ContextWindowResult;
496
- if (compactResult.summaryFailed !== undefined) {
497
- await compactionCircuit.recordOutcome(
498
- {
499
- currentRequestId: turnContext?.requestId,
500
- currentTurnTrustContext: turnContext?.trust,
501
- turnCount: turnContext?.turnIndex ?? 0,
502
- },
503
- compactResult.summaryFailed,
504
- onEvent,
505
- );
506
- }
507
- if (compactResult.compacted) {
508
- await compaction.applyResult(compactResult, rawHistory);
509
- }
510
- if (compactResult.exhausted ?? false) {
511
- return null;
512
- }
513
- return compaction.reinject();
514
- }
515
-
516
- /**
517
- * Adapt a `Message[]`-returning mock loop body into `run()`'s real result
518
- * shape. Mirrors the production loop: the pause-reason carried back is
519
- * whatever the most recent `onCheckpoint` call yielded with (null when it
520
- * never yielded), so the orchestrator derives its yield bookkeeping the same
521
- * way it does against the real loop.
522
- */
523
- const asAgentLoopRun = (
524
- fn: AgentLoopRun,
525
- compactionCircuit: CompactionCircuit,
526
- ): ((
527
- messages: Message[],
528
- onEvent: (event: AgentEvent) => void | Promise<void>,
529
- options?: AgentLoopRunOptions,
530
- ) => Promise<AgentLoopRunResult>) => {
531
- return async (messages, onEvent, options) => {
532
- let exitReason: AgentLoopRunResult["exitReason"] = null;
533
- let wrapped = options;
534
- if (options?.onCheckpoint) {
535
- const inner = options.onCheckpoint;
536
- wrapped = {
537
- ...options,
538
- onCheckpoint: async (info) => {
539
- // Handoff is offered first, mirroring the loop's ordering.
540
- const decision = await inner(info);
541
- if (decision !== "continue") {
542
- exitReason = decision;
543
- return decision;
544
- }
545
- // The mid-loop budget gate and inline compaction both live inside
546
- // `AgentLoop.run`. Replicate them here — same formula, stubbed
547
- // estimator, and the loop's own `compact()` ceremony — so these
548
- // orchestrator tests drive the real escalation path now that the
549
- // orchestrator's `onCheckpoint` is handoff-only and compaction
550
- // runs inline rather than via an orchestrator re-entry loop.
551
- const contextWindow = options.resolveContextWindow?.();
552
- if (contextWindow?.overflowRecovery.enabled) {
553
- const { maxInputTokens, overflowRecovery } = contextWindow;
554
- const safetyMargin =
555
- info.history.length > 50
556
- ? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
557
- : overflowRecovery.safetyMarginRatio;
558
- const preflightBudget = Math.floor(
559
- maxInputTokens * (1 - safetyMargin),
560
- );
561
- const estimated =
562
- typeof mockEstimateTokens === "function"
563
- ? mockEstimateTokens(info.history)
564
- : mockEstimateTokens;
565
- if (estimated > preflightBudget * 0.85) {
566
- // Mirror `AgentLoop.compact()`: when a compaction path is
567
- // supplied, run it in place and continue; on timeout or
568
- // exhaustion it returns null, so the loop yields "budget".
569
- const compacted = options.compaction
570
- ? await simulateInlineCompaction(
571
- options.compaction,
572
- info.history,
573
- options.turnContext,
574
- options.signal,
575
- onEvent,
576
- compactionCircuit,
577
- )
578
- : null;
579
- if (compacted) {
580
- exitReason = null;
581
- return "continue";
582
- }
583
- exitReason = "budget";
584
- return "budget";
585
- }
586
- }
587
- exitReason = null;
588
- return "continue";
589
- },
590
- };
591
- }
592
- const history = await fn(messages, onEvent, wrapped);
593
- return { history, exitReason };
594
- };
595
- };
596
-
597
438
  function makeCtx(
598
439
  overrides?: Partial<AgentLoopConversationContext> & {
599
- agentLoopRun?: AgentLoopRun;
440
+ providerResponses?: ScriptedResponse[];
441
+ loopProvider?: Provider;
442
+ loopTools?: ToolDefinition[];
443
+ toolExecutor?: LoopToolExecutor;
600
444
  },
601
445
  ): AgentLoopConversationContext {
602
- const agentLoopRun =
603
- overrides?.agentLoopRun ??
604
- (async (messages: Message[]) => [
605
- ...messages,
606
- {
607
- role: "assistant" as const,
608
- content: [{ type: "text" as const, text: "response" }],
609
- },
610
- ]);
611
-
612
- const compactionCircuit = new CompactionCircuit("test-conv");
446
+ const {
447
+ providerResponses,
448
+ loopProvider,
449
+ loopTools,
450
+ toolExecutor,
451
+ ...ctxOverrides
452
+ } = overrides ?? {};
453
+ const conversationId = ctxOverrides.conversationId ?? "test-conv";
454
+
455
+ // Drive the real `AgentLoop` against a scripted provider, mocking only the
456
+ // provider HTTP boundary. The loop owns its mid-loop budget gate, inline
457
+ // compaction, and event emission, so these overflow tests exercise the real
458
+ // escalation/persistence path.
459
+ const loopProviderName =
460
+ (ctxOverrides.provider as { name?: string } | undefined)?.name ??
461
+ "mock-provider";
462
+ const provider =
463
+ loopProvider ??
464
+ createMockProvider(
465
+ providerResponses ?? [textResponse("response")],
466
+ loopProviderName,
467
+ ).provider;
468
+ const agentLoop = new AgentLoop(provider, "system prompt", {
469
+ conversationId,
470
+ tools: loopTools ?? [],
471
+ toolExecutor,
472
+ });
613
473
 
614
474
  return {
615
475
  conversationId: "test-conv",
@@ -617,19 +477,16 @@ function makeCtx(
617
477
  { role: "user", content: [{ type: "text", text: "Hello" }] },
618
478
  ] as Message[],
619
479
  processing: true,
480
+ isProcessing(this: { processing: boolean }) {
481
+ return this.processing;
482
+ },
483
+ setProcessing(this: { processing: boolean }, value: boolean) {
484
+ this.processing = value;
485
+ },
620
486
  abortController: new AbortController(),
621
487
  currentRequestId: "test-req",
622
488
 
623
- agentLoop: {
624
- run: asAgentLoopRun(agentLoopRun, compactionCircuit),
625
- getToolTokenBudget: () => 0,
626
- getResolvedTools: () => [],
627
- // Tests in this file don't exercise calibration, so returning
628
- // undefined is fine — the estimator falls back to the per-provider
629
- // aggregate key.
630
- getActiveModel: () => undefined,
631
- compactionCircuit,
632
- } as unknown as AgentLoopConversationContext["agentLoop"],
489
+ agentLoop,
633
490
  provider: {
634
491
  name: "mock-provider",
635
492
  sendMessage: async () => ({
@@ -658,8 +515,6 @@ function makeCtx(
658
515
  currentTurnSurfaces: [],
659
516
 
660
517
  workingDir: "/tmp",
661
- workspaceTopLevelContext: null,
662
- workspaceTopLevelDirty: false,
663
518
  channelCapabilities: undefined,
664
519
  commandIntent: undefined,
665
520
  trustContext: undefined,
@@ -696,7 +551,6 @@ function makeCtx(
696
551
  getWorkspaceGitService: () => ({ ensureInitialized: async () => {} }),
697
552
  commitTurnChanges: async () => {},
698
553
 
699
- refreshWorkspaceTopLevelContextIfNeeded: () => {},
700
554
  markWorkspaceTopLevelDirty: () => {},
701
555
  emitActivityState: () => {},
702
556
  getQueueDepth: () => 0,
@@ -722,9 +576,10 @@ function makeCtx(
722
576
  injectedTokens: 0,
723
577
  }),
724
578
  retrackCachedNodes: () => {},
579
+ recordPkbQueryVectors: () => {},
725
580
  } as unknown as AgentLoopConversationContext["graphMemory"],
726
581
 
727
- ...overrides,
582
+ ...ctxOverrides,
728
583
  } as AgentLoopConversationContext;
729
584
  }
730
585
 
@@ -793,15 +648,15 @@ beforeEach(() => {
793
648
  recordUsageMock.mockClear();
794
649
  setAgentLoopExitReasonOnLatestLogMock.mockClear();
795
650
  addMessageMock.mockClear();
796
- // Reset the plugin registry and re-register every default so the
797
- // orchestrator's pipelines (`overflowReduce`, `persistence`, …) dispatch to
798
- // the default middleware, which in turn hits the mocked collaborators
799
- // (`reduceContextOverflow`, `syncMessageToDisk`, …) these tests install.
651
+ // Reset the plugin registry and re-register every default so the compaction
652
+ // pipeline dispatches to the default middleware, which in turn hits the
653
+ // mocked collaborators (`syncMessageToDisk`, …) these tests install.
800
654
  resetPluginRegistryAndRegisterDefaults();
801
655
  });
802
656
 
803
657
  describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
804
658
  test("usage update context max follows active main-agent profile budget", async () => {
659
+ // GIVEN an active main-agent profile that narrows the context budget
805
660
  mockLlmConfig = {
806
661
  ...structuredClone(defaultLlmConfig),
807
662
  activeProfile: "short-context",
@@ -813,27 +668,22 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
813
668
  },
814
669
  };
815
670
 
671
+ // AND a provider turn that reports 12k input tokens of usage
816
672
  const ctx = makeCtx({
817
- agentLoopRun: async (messages, onEvent) => {
818
- onEvent({
819
- type: "usage",
820
- inputTokens: 12_000,
821
- outputTokens: 300,
673
+ providerResponses: [
674
+ {
675
+ content: [{ type: "text", text: "response" }],
822
676
  model: "mock-model",
823
- providerDurationMs: 25,
824
- });
825
- return [
826
- ...messages,
827
- {
828
- role: "assistant" as const,
829
- content: [{ type: "text" as const, text: "response" }],
830
- },
831
- ];
832
- },
677
+ usage: { inputTokens: 12_000, outputTokens: 300 },
678
+ stopReason: "end_turn",
679
+ },
680
+ ],
833
681
  });
834
682
 
683
+ // WHEN the turn runs to completion
835
684
  await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
836
685
 
686
+ // THEN the recorded main-agent usage carries the profile's max budget
837
687
  const mainAgentUsageCall = recordUsageMock.mock.calls.find(
838
688
  (call) => call[5] === "main_agent",
839
689
  );
@@ -846,10 +696,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
846
696
 
847
697
  // ── Test 1 ────────────────────────────────────────────────────────
848
698
  // BUG: When the agent loop makes progress (adds messages to history)
849
- // before hitting context_too_large, the convergence loop at line 864
850
- // checks `updatedHistory.length === preRunHistoryLength` which is
851
- // false when progress was made. This means the reducer is never
852
- // invoked — the error is surfaced immediately at line 1163-1175
699
+ // before hitting context_too_large, the convergence loop's progress
700
+ // check must recognize that the loop appended messages. If it fails to,
701
+ // the reducer is never invoked the error is surfaced immediately
853
702
  // without any compaction attempt.
854
703
  //
855
704
  // Expected behavior (PR 2 fix): After progress + context_too_large,
@@ -889,125 +738,31 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
889
738
  };
890
739
  };
891
740
 
892
- let agentLoopCallCount = 0;
893
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
894
- // Prime the assistant row anchor production code emits this from
895
- // `AgentLoop.run` just before `provider.sendMessage`.
896
- await onEvent({ type: "llm_call_started" });
897
- agentLoopCallCount++;
898
- if (agentLoopCallCount === 1) {
899
- // Simulate: agent makes progress (tool calls + results added)
900
- // then hits context_too_large on next LLM call
901
- const progressMessages: Message[] = [
902
- ...messages,
903
- {
904
- role: "assistant" as const,
905
- content: [
906
- { type: "text", text: "Let me check that." },
907
- {
908
- type: "tool_use",
909
- id: "tu-progress",
910
- name: "bash",
911
- input: { command: "ls" },
912
- },
913
- ] as ContentBlock[],
914
- },
915
- {
916
- role: "user" as const,
917
- content: [
918
- {
919
- type: "tool_result",
920
- tool_use_id: "tu-progress",
921
- content: "file1.ts\nfile2.ts",
922
- is_error: false,
923
- },
924
- ] as ContentBlock[],
925
- },
926
- ];
741
+ // Run 1 makes progress (a tool turn) then the following provider call
742
+ // rejects with a context_too_large error; after the convergence reducer
743
+ // compacts, the rerun recovers with plain text.
744
+ const { provider } = createMockProvider([
745
+ toolUseResponse("tu-progress", "bash", { command: "ls" }),
746
+ new Error("prompt is too long: 242201 tokens > 200000 maximum"),
747
+ textResponse("recovered after compaction"),
748
+ ]);
927
749
 
928
- // Emit events for the progress that was made
929
- onEvent({
930
- type: "tool_use",
931
- id: "tu-progress",
750
+ const ctx = makeCtx({
751
+ loopProvider: provider,
752
+ loopTools: [
753
+ {
932
754
  name: "bash",
933
- input: { command: "ls" },
934
- });
935
- onEvent({
936
- type: "tool_result",
937
- toolUseId: "tu-progress",
938
- content: "file1.ts\nfile2.ts",
939
- isError: false,
940
- });
941
- onEvent({
942
- type: "message_complete",
943
- message: {
944
- role: "assistant",
945
- content: [
946
- { type: "text", text: "Let me check that." },
947
- {
948
- type: "tool_use",
949
- id: "tu-progress",
950
- name: "bash",
951
- input: { command: "ls" },
952
- },
953
- ],
755
+ description: "Run a shell command",
756
+ input_schema: {
757
+ type: "object",
758
+ properties: { command: { type: "string" } },
954
759
  },
955
- });
956
- onEvent({
957
- type: "usage",
958
- inputTokens: 100,
959
- outputTokens: 50,
960
- model: "test-model",
961
- providerDurationMs: 100,
962
- });
963
-
964
- // Then context_too_large error occurs on the *next* LLM call
965
- onEvent({
966
- type: "error",
967
- error: new Error(
968
- "prompt is too long: 242201 tokens > 200000 maximum",
969
- ),
970
- });
971
- onEvent({
972
- type: "usage",
973
- inputTokens: 0,
974
- outputTokens: 0,
975
- model: "test-model",
976
- providerDurationMs: 10,
977
- });
978
-
979
- // Return the history WITH progress (more messages than input)
980
- return progressMessages;
981
- }
982
-
983
- // Second call (after compaction): succeed
984
- onEvent({
985
- type: "message_complete",
986
- message: {
987
- role: "assistant",
988
- content: [{ type: "text", text: "recovered after compaction" }],
989
- },
990
- });
991
- onEvent({
992
- type: "usage",
993
- inputTokens: 50,
994
- outputTokens: 25,
995
- model: "test-model",
996
- providerDurationMs: 100,
997
- });
998
- return [
999
- ...messages,
1000
- {
1001
- role: "assistant" as const,
1002
- content: [
1003
- { type: "text", text: "recovered after compaction" },
1004
- ] as ContentBlock[],
1005
760
  },
1006
- ];
1007
- };
1008
-
1009
- const ctx = makeCtx({
1010
- agentLoopRun,
761
+ ],
762
+ toolExecutor: async () => ({
763
+ content: "file1.ts\nfile2.ts",
764
+ isError: false,
765
+ }),
1011
766
  contextWindowManager: {
1012
767
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1013
768
  maybeCompact: async () => ({ compacted: false }),
@@ -1036,13 +791,14 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1036
791
  // This test should PASS against current code (when no progress is made).
1037
792
  test("overflow recovery compacts below limit even when estimation underestimates", async () => {
1038
793
  const events: ServerMessage[] = [];
1039
- let callCount = 0;
1040
794
  let reducerCalled = false;
1041
795
 
1042
- // Estimator says 185k (below 190k budget = 200k * 0.95)
796
+ // GIVEN the estimator reports 185k under the 190k preflight budget
797
+ // (200k * 0.95), so the turn proceeds to the provider rather than
798
+ // compacting up front.
1043
799
  mockEstimateTokens = 185_000;
1044
800
 
1045
- // Reducer successfully compacts
801
+ // AND the post-run convergence reducer successfully compacts
1046
802
  mockReducerStepFn = (msgs: Message[]) => {
1047
803
  reducerCalled = true;
1048
804
  return {
@@ -1072,96 +828,46 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1072
828
  };
1073
829
  };
1074
830
 
1075
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1076
- // Prime the assistant row anchor production code emits this from
1077
- // `AgentLoop.run` just before `provider.sendMessage`.
1078
- await onEvent({ type: "llm_call_started" });
1079
- callCount++;
1080
- if (callCount === 1) {
1081
- // Provider rejects with "prompt is too long: 242201 tokens > 200000"
1082
- // even though estimator said 185k
1083
- onEvent({
1084
- type: "error",
1085
- error: new Error(
1086
- "prompt is too long: 242201 tokens > 200000 maximum",
1087
- ),
1088
- });
1089
- onEvent({
1090
- type: "usage",
1091
- inputTokens: 0,
1092
- outputTokens: 0,
1093
- model: "test-model",
1094
- providerDurationMs: 10,
1095
- });
1096
- // No progress — return same messages
1097
- return messages;
1098
- }
1099
- // Second call succeeds
1100
- onEvent({
1101
- type: "message_complete",
1102
- message: {
1103
- role: "assistant",
1104
- content: [{ type: "text", text: "recovered" }],
1105
- },
1106
- });
1107
- onEvent({
1108
- type: "usage",
1109
- inputTokens: 80_000,
1110
- outputTokens: 200,
1111
- model: "test-model",
1112
- providerDurationMs: 500,
1113
- });
1114
- return [
1115
- ...messages,
1116
- {
1117
- role: "assistant" as const,
1118
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
1119
- },
1120
- ];
1121
- };
831
+ // AND a provider that rejects the first call as too long (revealing the
832
+ // real 242k count the estimator missed), then succeeds on the rerun.
833
+ const { provider, calls } = createMockProvider([
834
+ new Error("prompt is too long: 242201 tokens > 200000 maximum"),
835
+ textResponse("recovered"),
836
+ ]);
1122
837
 
1123
838
  const ctx = makeCtx({
1124
- agentLoopRun,
839
+ loopProvider: provider,
1125
840
  contextWindowManager: {
1126
841
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1127
842
  maybeCompact: async () => ({ compacted: false }),
1128
843
  } as unknown as AgentLoopConversationContext["contextWindowManager"],
1129
844
  });
1130
845
 
846
+ // WHEN the turn runs
1131
847
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
1132
848
 
1133
- // The reducer should be called in the convergence loop
849
+ // THEN the convergence reducer ran and the rerun recovered without a
850
+ // user-facing conversation_error.
1134
851
  expect(reducerCalled).toBe(true);
1135
- // Should recover without conversation_error
1136
852
  const conversationError = events.find(
1137
853
  (e) => e.type === "conversation_error",
1138
854
  );
1139
855
  expect(conversationError).toBeUndefined();
1140
- expect(callCount).toBe(2);
856
+ expect(calls.length).toBe(2);
1141
857
  });
1142
858
 
1143
859
  // ── Test 3 ────────────────────────────────────────────────────────
1144
- // BUG: When the provider rejection reveals actual token count (e.g.,
1145
- // "242201 tokens > 200000"), the reducer should target a budget below
1146
- // the actual limit (not below the estimator's inaccurate budget).
1147
- // Currently the reducer always uses `preflightBudget` (190k) as the
1148
- // target, but the actual tokens were 242k so 190k is already too
1149
- // high relative to the real count. The target should be adjusted
1150
- // downward based on the observed mismatch.
1151
- //
1152
- // Expected behavior (PR 4 fix): `targetInputTokensOverride` should
1153
- // be adjusted based on the ratio between estimated and actual tokens.
1154
- // BUG: The targetTokens passed to the reducer is preflightBudget = 190k.
1155
- // But when the actual token count is 242k (1.31x the estimate of 185k),
1156
- // the target should be adjusted downward to account for the estimation
1157
- // inaccuracy. For example: 190k / 1.31 ≈ 145k.
1158
- // Planned fix: targetInputTokensOverride should be adjusted based on
1159
- // the ratio between estimated and actual tokens.
860
+ // When the provider rejection reveals the actual token count (e.g.,
861
+ // "242201 tokens > 200000"), the overflow reducer's `targetTokens`
862
+ // should be a budget below the actual limit, not below the estimator's
863
+ // inaccurate budget. With a preflightBudget of 190k but an actual count
864
+ // of 242k (1.31x the estimate of 185k), the target is adjusted downward
865
+ // based on the observed mismatch (190k / 1.31 145k) so the reducer
866
+ // converges toward the real ceiling rather than the optimistic estimate.
1160
867
  test.todo(
1161
868
  "forced compaction targets a lower budget when estimation has been inaccurate",
1162
869
  async () => {
1163
870
  const events: ServerMessage[] = [];
1164
- let callCount = 0;
1165
871
  let capturedTargetTokens: number | undefined;
1166
872
 
1167
873
  // Estimator says 185k (below 190k budget = 200k * 0.95)
@@ -1197,55 +903,16 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1197
903
  };
1198
904
  };
1199
905
 
1200
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1201
- // Prime the assistant row anchor production code emits this from
1202
- // `AgentLoop.run` just before `provider.sendMessage`.
1203
- await onEvent({ type: "llm_call_started" });
1204
- callCount++;
1205
- if (callCount === 1) {
1206
- // Provider rejects: actual tokens 242201, way above estimate of 185k
1207
- onEvent({
1208
- type: "error",
1209
- error: new Error(
1210
- "prompt is too long: 242201 tokens > 200000 maximum",
1211
- ),
1212
- });
1213
- onEvent({
1214
- type: "usage",
1215
- inputTokens: 0,
1216
- outputTokens: 0,
1217
- model: "test-model",
1218
- providerDurationMs: 10,
1219
- });
1220
- // No progress — return same messages
1221
- return messages;
1222
- }
1223
- // Second call succeeds after compaction
1224
- onEvent({
1225
- type: "message_complete",
1226
- message: {
1227
- role: "assistant",
1228
- content: [{ type: "text", text: "recovered" }],
1229
- },
1230
- });
1231
- onEvent({
1232
- type: "usage",
1233
- inputTokens: 80_000,
1234
- outputTokens: 200,
1235
- model: "test-model",
1236
- providerDurationMs: 500,
1237
- });
1238
- return [
1239
- ...messages,
1240
- {
1241
- role: "assistant" as const,
1242
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
1243
- },
1244
- ];
1245
- };
906
+ // The provider rejects the first call with a context_too_large error
907
+ // (actual tokens 242201, far above the 185k estimate); after forced
908
+ // compaction re-targets a lower budget, the rerun recovers with text.
909
+ const { provider, calls } = createMockProvider([
910
+ new Error("prompt is too long: 242201 tokens > 200000 maximum"),
911
+ textResponse("recovered"),
912
+ ]);
1246
913
 
1247
914
  const ctx = makeCtx({
1248
- agentLoopRun,
915
+ loopProvider: provider,
1249
916
  contextWindowManager: {
1250
917
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1251
918
  maybeCompact: async () => ({ compacted: false }),
@@ -1275,7 +942,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1275
942
  (e) => e.type === "conversation_error",
1276
943
  );
1277
944
  expect(conversationError).toBeUndefined();
1278
- expect(callCount).toBe(2);
945
+ expect(calls.length).toBe(2);
1279
946
  },
1280
947
  );
1281
948
 
@@ -1289,7 +956,6 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1289
956
  async () => {
1290
957
  const events: ServerMessage[] = [];
1291
958
  const longHistory = buildLongConversation(75);
1292
- let callCount = 0;
1293
959
  let reducerCalled = false;
1294
960
 
1295
961
  // Estimator says ~195k — just above budget so preflight reducer runs
@@ -1325,38 +991,14 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1325
991
  };
1326
992
  };
1327
993
 
1328
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1329
- // Prime the assistant row anchor production code emits this from
1330
- // `AgentLoop.run` just before `provider.sendMessage`.
1331
- await onEvent({ type: "llm_call_started" });
1332
- callCount++;
1333
- onEvent({
1334
- type: "message_complete",
1335
- message: {
1336
- role: "assistant",
1337
- content: [{ type: "text", text: "Here's the analysis..." }],
1338
- },
1339
- });
1340
- onEvent({
1341
- type: "usage",
1342
- inputTokens: 50_000,
1343
- outputTokens: 300,
1344
- model: "test-model",
1345
- providerDurationMs: 800,
1346
- });
1347
- return [
1348
- ...messages,
1349
- {
1350
- role: "assistant" as const,
1351
- content: [
1352
- { type: "text", text: "Here's the analysis..." },
1353
- ] as ContentBlock[],
1354
- },
1355
- ];
1356
- };
994
+ // After the preflight reducer compacts the long history under budget,
995
+ // a single provider call completes the turn with plain text.
996
+ const { provider, calls } = createMockProvider([
997
+ textResponse("Here's the analysis..."),
998
+ ]);
1357
999
 
1358
1000
  const ctx = makeCtx({
1359
- agentLoopRun,
1001
+ loopProvider: provider,
1360
1002
  messages: longHistory,
1361
1003
  contextWindowManager: {
1362
1004
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
@@ -1371,7 +1013,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1371
1013
  // Preflight should trigger the reducer since 195k > 190k budget
1372
1014
  expect(reducerCalled).toBe(true);
1373
1015
  // Should succeed
1374
- expect(callCount).toBe(1);
1016
+ expect(calls.length).toBe(1);
1375
1017
  const conversationError = events.find(
1376
1018
  (e) => e.type === "conversation_error",
1377
1019
  );
@@ -1415,118 +1057,31 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1415
1057
  };
1416
1058
  };
1417
1059
 
1418
- let agentLoopCallCount = 0;
1419
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
1420
- // Prime the assistant row anchor — production code emits this from
1421
- // `AgentLoop.run` just before `provider.sendMessage`.
1422
- await onEvent({ type: "llm_call_started" });
1423
- agentLoopCallCount++;
1424
- if (agentLoopCallCount === 1) {
1425
- // Agent makes progress (tool calls succeed, messages grow)
1426
- const progressMessages: Message[] = [
1427
- ...messages,
1428
- {
1429
- role: "assistant" as const,
1430
- content: [
1431
- { type: "text", text: "Running analysis..." },
1432
- {
1433
- type: "tool_use",
1434
- id: "tu-1",
1435
- name: "bash",
1436
- input: { command: "find . -name '*.ts'" },
1437
- },
1438
- ] as ContentBlock[],
1439
- },
1440
- {
1441
- role: "user" as const,
1442
- content: [
1443
- {
1444
- type: "tool_result",
1445
- tool_use_id: "tu-1",
1446
- content: "file1.ts\nfile2.ts\nfile3.ts",
1447
- is_error: false,
1448
- },
1449
- ] as ContentBlock[],
1450
- },
1451
- ];
1060
+ // Run 1 makes progress (a tool turn) then the following provider call
1061
+ // rejects with context_too_large; after emergency compaction the rerun
1062
+ // recovers with plain text.
1063
+ const { provider } = createMockProvider([
1064
+ toolUseResponse("tu-1", "bash", { command: "find . -name '*.ts'" }),
1065
+ new Error("context_length_exceeded"),
1066
+ textResponse("recovered"),
1067
+ ]);
1452
1068
 
1453
- onEvent({
1454
- type: "tool_use",
1455
- id: "tu-1",
1069
+ const ctx = makeCtx({
1070
+ loopProvider: provider,
1071
+ loopTools: [
1072
+ {
1456
1073
  name: "bash",
1457
- input: { command: "find . -name '*.ts'" },
1458
- });
1459
- onEvent({
1460
- type: "tool_result",
1461
- toolUseId: "tu-1",
1462
- content: "file1.ts\nfile2.ts\nfile3.ts",
1463
- isError: false,
1464
- });
1465
- onEvent({
1466
- type: "message_complete",
1467
- message: {
1468
- role: "assistant",
1469
- content: [
1470
- { type: "text", text: "Running analysis..." },
1471
- {
1472
- type: "tool_use",
1473
- id: "tu-1",
1474
- name: "bash",
1475
- input: { command: "find . -name '*.ts'" },
1476
- },
1477
- ],
1074
+ description: "Run a shell command",
1075
+ input_schema: {
1076
+ type: "object",
1077
+ properties: { command: { type: "string" } },
1478
1078
  },
1479
- });
1480
- onEvent({
1481
- type: "usage",
1482
- inputTokens: 190_000,
1483
- outputTokens: 100,
1484
- model: "test-model",
1485
- providerDurationMs: 200,
1486
- });
1487
-
1488
- // Then context_too_large on the next LLM call within the loop
1489
- onEvent({
1490
- type: "error",
1491
- error: new Error("context_length_exceeded"),
1492
- });
1493
- onEvent({
1494
- type: "usage",
1495
- inputTokens: 0,
1496
- outputTokens: 0,
1497
- model: "test-model",
1498
- providerDurationMs: 10,
1499
- });
1500
-
1501
- return progressMessages;
1502
- }
1503
-
1504
- // After emergency compaction, succeed
1505
- onEvent({
1506
- type: "message_complete",
1507
- message: {
1508
- role: "assistant",
1509
- content: [{ type: "text", text: "recovered" }],
1510
1079
  },
1511
- });
1512
- onEvent({
1513
- type: "usage",
1514
- inputTokens: 50_000,
1515
- outputTokens: 100,
1516
- model: "test-model",
1517
- providerDurationMs: 200,
1518
- });
1519
- return [
1520
- ...messages,
1521
- {
1522
- role: "assistant" as const,
1523
- content: [{ type: "text", text: "recovered" }] as ContentBlock[],
1524
- },
1525
- ];
1526
- };
1527
-
1528
- const ctx = makeCtx({
1529
- agentLoopRun,
1080
+ ],
1081
+ toolExecutor: async () => ({
1082
+ content: "file1.ts\nfile2.ts\nfile3.ts",
1083
+ isError: false,
1084
+ }),
1530
1085
  contextWindowManager: {
1531
1086
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1532
1087
  maybeCompact: async (
@@ -1603,111 +1158,30 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1603
1158
  return 170_000;
1604
1159
  };
1605
1160
 
1606
- let agentLoopCallCount = 0;
1607
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
1608
- // Prime the assistant row anchor production code emits this from
1609
- // `AgentLoop.run` just before `provider.sendMessage`.
1610
- await onEvent({ type: "llm_call_started" });
1611
- agentLoopCallCount++;
1612
-
1613
- if (agentLoopCallCount === 1) {
1614
- // Simulate a tool round: assistant calls a tool, results come back
1615
- const withProgress: Message[] = [
1616
- ...messages,
1617
- {
1618
- role: "assistant" as const,
1619
- content: [
1620
- { type: "text", text: "Let me check." },
1621
- {
1622
- type: "tool_use",
1623
- id: "tu-1",
1624
- name: "bash",
1625
- input: { command: "ls" },
1626
- },
1627
- ] as ContentBlock[],
1628
- },
1629
- {
1630
- role: "user" as const,
1631
- content: [
1632
- {
1633
- type: "tool_result",
1634
- tool_use_id: "tu-1",
1635
- content: "file1.ts\nfile2.ts",
1636
- is_error: false,
1637
- },
1638
- ] as ContentBlock[],
1639
- },
1640
- ];
1641
-
1642
- onEvent({
1643
- type: "message_complete",
1644
- message: {
1645
- role: "assistant",
1646
- content: [
1647
- { type: "text", text: "Let me check." },
1648
- {
1649
- type: "tool_use",
1650
- id: "tu-1",
1651
- name: "bash",
1652
- input: { command: "ls" },
1653
- },
1654
- ],
1655
- },
1656
- });
1657
- onEvent({
1658
- type: "usage",
1659
- inputTokens: 100,
1660
- outputTokens: 50,
1661
- model: "test-model",
1662
- providerDurationMs: 100,
1663
- });
1664
-
1665
- // Call onCheckpoint — this should trigger the mid-loop budget check
1666
- // which sees 170_000 > 161_500 and returns "yield"
1667
- if (options?.onCheckpoint) {
1668
- const decision = await options.onCheckpoint({
1669
- turnIndex: 0,
1670
- toolCount: 1,
1671
- hasToolUse: true,
1672
- history: withProgress,
1673
- });
1674
- if (decision !== "continue") {
1675
- // Agent loop stops when checkpoint yields
1676
- return withProgress;
1677
- }
1678
- }
1679
-
1680
- return withProgress;
1681
- }
1161
+ // A tool round trips the mid-loop budget gate (170k > 161_500); the
1162
+ // gate compacts in place (productive) and the loop continues, so the
1163
+ // post-compaction provider call completes the turn with plain text.
1164
+ const { provider, calls } = createMockProvider([
1165
+ toolUseResponse("tu-1", "bash", { command: "ls" }),
1166
+ textResponse("done after compaction"),
1167
+ ]);
1682
1168
 
1683
- // Second call (after compaction): complete successfully
1684
- onEvent({
1685
- type: "message_complete",
1686
- message: {
1687
- role: "assistant",
1688
- content: [{ type: "text", text: "done after compaction" }],
1689
- },
1690
- });
1691
- onEvent({
1692
- type: "usage",
1693
- inputTokens: 50,
1694
- outputTokens: 25,
1695
- model: "test-model",
1696
- providerDurationMs: 100,
1697
- });
1698
- return [
1699
- ...messages,
1169
+ const ctx = makeCtx({
1170
+ loopProvider: provider,
1171
+ loopTools: [
1700
1172
  {
1701
- role: "assistant" as const,
1702
- content: [
1703
- { type: "text", text: "done after compaction" },
1704
- ] as ContentBlock[],
1173
+ name: "bash",
1174
+ description: "Run a shell command",
1175
+ input_schema: {
1176
+ type: "object",
1177
+ properties: { command: { type: "string" } },
1178
+ },
1705
1179
  },
1706
- ];
1707
- };
1708
-
1709
- const ctx = makeCtx({
1710
- agentLoopRun,
1180
+ ],
1181
+ toolExecutor: async () => ({
1182
+ content: "file1.ts\nfile2.ts",
1183
+ isError: false,
1184
+ }),
1711
1185
  contextWindowManager: {
1712
1186
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1713
1187
  maybeCompact: async () => {
@@ -1741,8 +1215,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1741
1215
  // The mid-loop budget check should have triggered compaction
1742
1216
  expect(compactionCalled).toBe(true);
1743
1217
 
1744
- // Agent loop should have been called twice: once before yield, once after compaction
1745
- expect(agentLoopCallCount).toBe(2);
1218
+ // Provider called twice: the tool turn that tripped the gate, then the
1219
+ // post-compaction turn that completed the run.
1220
+ expect(calls.length).toBe(2);
1746
1221
 
1747
1222
  // No conversation_error should be emitted
1748
1223
  const conversationError = events.find(
@@ -1783,104 +1258,36 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1783
1258
  return 175_000;
1784
1259
  };
1785
1260
 
1786
- let agentLoopCallCount = 0;
1787
1261
  let contextTooLargeEmitted = false;
1788
1262
 
1789
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
1790
- // Prime the assistant row anchor production code emits this from
1791
- // `AgentLoop.run` just before `provider.sendMessage`.
1792
- await onEvent({ type: "llm_call_started" });
1793
- agentLoopCallCount++;
1794
-
1795
- if (agentLoopCallCount === 1) {
1796
- const currentHistory = [...messages];
1797
-
1798
- // Simulate 5 tool rounds — but the checkpoint should yield at round 3
1799
- for (let i = 0; i < 5; i++) {
1800
- const toolId = `tu-${i}`;
1801
- const assistantMsg: Message = {
1802
- role: "assistant" as const,
1803
- content: [
1804
- { type: "text", text: `Step ${i}` },
1805
- {
1806
- type: "tool_use",
1807
- id: toolId,
1808
- name: "bash",
1809
- input: { command: `cmd-${i}` },
1810
- },
1811
- ] as ContentBlock[],
1812
- };
1813
- const resultMsg: Message = {
1814
- role: "user" as const,
1815
- content: [
1816
- {
1817
- type: "tool_result",
1818
- tool_use_id: toolId,
1819
- content: "x".repeat(10_000),
1820
- is_error: false,
1821
- },
1822
- ] as ContentBlock[],
1823
- };
1824
- currentHistory.push(assistantMsg, resultMsg);
1825
-
1826
- onEvent({
1827
- type: "message_complete",
1828
- message: assistantMsg,
1829
- });
1830
- onEvent({
1831
- type: "usage",
1832
- inputTokens: 50_000 + i * 20_000,
1833
- outputTokens: 50,
1834
- model: "test-model",
1835
- providerDurationMs: 100,
1836
- });
1837
-
1838
- if (options?.onCheckpoint) {
1839
- const decision = await options.onCheckpoint({
1840
- turnIndex: i,
1841
- toolCount: 1,
1842
- hasToolUse: true,
1843
- history: currentHistory,
1844
- });
1845
- if (decision !== "continue") {
1846
- return currentHistory;
1847
- }
1848
- }
1849
- }
1263
+ // Each tool round produces a large result; the estimate grows with each
1264
+ // checkpoint until tool round 3 trips the mid-loop gate (175k > 161_500).
1265
+ // Compaction runs in place (productive) and the loop continues, so the
1266
+ // following plain-text provider call completes the turn. The provider
1267
+ // never rejects with context_too_large.
1268
+ const { provider, calls } = createMockProvider([
1269
+ toolUseResponse("tu-0", "bash", { command: "cmd-0" }),
1270
+ toolUseResponse("tu-1", "bash", { command: "cmd-1" }),
1271
+ toolUseResponse("tu-2", "bash", { command: "cmd-2" }),
1272
+ textResponse("completed after mid-loop compaction"),
1273
+ ]);
1850
1274
 
1851
- return currentHistory;
1852
- }
1853
-
1854
- // Second call (after compaction): complete
1855
- onEvent({
1856
- type: "message_complete",
1857
- message: {
1858
- role: "assistant",
1859
- content: [
1860
- { type: "text", text: "completed after mid-loop compaction" },
1861
- ],
1862
- },
1863
- });
1864
- onEvent({
1865
- type: "usage",
1866
- inputTokens: 60_000,
1867
- outputTokens: 100,
1868
- model: "test-model",
1869
- providerDurationMs: 200,
1870
- });
1871
- return [
1872
- ...messages,
1275
+ const ctx = makeCtx({
1276
+ loopProvider: provider,
1277
+ loopTools: [
1873
1278
  {
1874
- role: "assistant" as const,
1875
- content: [
1876
- { type: "text", text: "completed after mid-loop compaction" },
1877
- ] as ContentBlock[],
1279
+ name: "bash",
1280
+ description: "Run a shell command",
1281
+ input_schema: {
1282
+ type: "object",
1283
+ properties: { command: { type: "string" } },
1284
+ },
1878
1285
  },
1879
- ];
1880
- };
1881
-
1882
- const ctx = makeCtx({
1883
- agentLoopRun,
1286
+ ],
1287
+ toolExecutor: async () => ({
1288
+ content: "x".repeat(10_000),
1289
+ isError: false,
1290
+ }),
1884
1291
  contextWindowManager: {
1885
1292
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
1886
1293
  maybeCompact: async () => {
@@ -1927,8 +1334,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1927
1334
  // The provider should NEVER have rejected with context_too_large
1928
1335
  expect(contextTooLargeEmitted).toBe(false);
1929
1336
 
1930
- // Agent loop called twice: once (yielded at tool 3), once after compaction
1931
- expect(agentLoopCallCount).toBe(2);
1337
+ // Provider called four times: three tool rounds (the third trips the
1338
+ // mid-loop gate) plus the post-compaction text turn that completes.
1339
+ expect(calls.length).toBe(4);
1932
1340
 
1933
1341
  // No conversation_error
1934
1342
  const conversationError = events.find(
@@ -1957,82 +1365,7 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
1957
1365
  return 170_000;
1958
1366
  };
1959
1367
 
1960
- let agentLoopCallCount = 0;
1961
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
1962
- // Prime the assistant row anchor — production code emits this from
1963
- // `AgentLoop.run` just before `provider.sendMessage`.
1964
- await onEvent({ type: "llm_call_started" });
1965
- agentLoopCallCount++;
1966
-
1967
- // Every call: simulate tool progress then yield at checkpoint
1968
- const withProgress: Message[] = [
1969
- ...messages,
1970
- {
1971
- role: "assistant" as const,
1972
- content: [
1973
- { type: "text", text: `Tool call ${agentLoopCallCount}` },
1974
- {
1975
- type: "tool_use",
1976
- id: `tu-${agentLoopCallCount}`,
1977
- name: "bash",
1978
- input: { command: "ls" },
1979
- },
1980
- ] as ContentBlock[],
1981
- },
1982
- {
1983
- role: "user" as const,
1984
- content: [
1985
- {
1986
- type: "tool_result",
1987
- tool_use_id: `tu-${agentLoopCallCount}`,
1988
- content: "output",
1989
- is_error: false,
1990
- },
1991
- ] as ContentBlock[],
1992
- },
1993
- ];
1994
-
1995
- onEvent({
1996
- type: "message_complete",
1997
- message: {
1998
- role: "assistant",
1999
- content: [
2000
- { type: "text", text: `Tool call ${agentLoopCallCount}` },
2001
- {
2002
- type: "tool_use",
2003
- id: `tu-${agentLoopCallCount}`,
2004
- name: "bash",
2005
- input: { command: "ls" },
2006
- },
2007
- ],
2008
- },
2009
- });
2010
- onEvent({
2011
- type: "usage",
2012
- inputTokens: 100,
2013
- outputTokens: 50,
2014
- model: "test-model",
2015
- providerDurationMs: 100,
2016
- });
2017
-
2018
- // Always yield at checkpoint — simulates compaction not helping
2019
- if (options?.onCheckpoint) {
2020
- const decision = await options.onCheckpoint({
2021
- turnIndex: 0,
2022
- toolCount: 1,
2023
- hasToolUse: true,
2024
- history: withProgress,
2025
- });
2026
- if (decision !== "continue") {
2027
- return withProgress;
2028
- }
2029
- }
2030
-
2031
- return withProgress;
2032
- };
2033
-
2034
- let compactionCallCount = 0;
2035
- // Convergence reducer: reduce tokens enough to succeed
1368
+ // The convergence reducer reduces tokens enough for the rerun to recover.
2036
1369
  let convergenceReducerCalled = false;
2037
1370
  mockReducerStepFn = (msgs: Message[]) => {
2038
1371
  convergenceReducerCalled = true;
@@ -2048,8 +1381,30 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2048
1381
  };
2049
1382
  };
2050
1383
 
1384
+ // Every provider call returns a tool_use, so each loop run does a tool
1385
+ // turn that trips the mid-loop budget gate. On the initial run the gate
1386
+ // calls compaction (which surfaces `exhausted: true`); the convergence
1387
+ // rerun runs without a compaction hook and yields "budget" directly.
1388
+ // With the reducer exhausted, the convergence loop terminates with the
1389
+ // turn still over budget and the orchestrator stamps `context_too_large`.
1390
+ const { provider, calls } = createMockProvider([
1391
+ toolUseResponse("tu-1", "bash", { command: "ls" }),
1392
+ ]);
1393
+
1394
+ let compactionCallCount = 0;
2051
1395
  const ctx = makeCtx({
2052
- agentLoopRun,
1396
+ loopProvider: provider,
1397
+ loopTools: [
1398
+ {
1399
+ name: "bash",
1400
+ description: "Run a shell command",
1401
+ input_schema: {
1402
+ type: "object",
1403
+ properties: { command: { type: "string" } },
1404
+ },
1405
+ },
1406
+ ],
1407
+ toolExecutor: async () => ({ content: "output", isError: false }),
2053
1408
  contextWindowManager: {
2054
1409
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2055
1410
  maybeCompact: async () => {
@@ -2057,9 +1412,9 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2057
1412
  // Compaction's internal retry budget is exhausted — the
2058
1413
  // compactor itself ran maxAttempts passes and still couldn't
2059
1414
  // drop below the auto-threshold. `maybeCompact` surfaces this
2060
- // via `exhausted: true` so the orchestrator escalates
2061
- // straight to the convergence loop instead of looping on a
2062
- // stuck compactor.
1415
+ // via `exhausted: true` so the loop yields "budget" and the
1416
+ // orchestrator escalates straight to the convergence loop
1417
+ // instead of looping on a stuck compactor.
2063
1418
  return {
2064
1419
  compacted: true,
2065
1420
  exhausted: true,
@@ -2094,10 +1449,10 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2094
1449
  // `ContextWindowManager.maybeCompact`.
2095
1450
  expect(compactionCallCount).toBe(2);
2096
1451
 
2097
- // Agent loop: 1 initial + 1 convergence re-run = 2 calls. No
2098
- // mid-loop re-entries because the orchestrator broke out on
2099
- // `exhausted` before re-invoking the agent loop.
2100
- expect(agentLoopCallCount).toBe(2);
1452
+ // Provider calls: 1 initial tool turn (yields budget) + 1 convergence
1453
+ // rerun that recovers. No mid-loop re-entries because the orchestrator
1454
+ // broke out on `exhausted` before re-invoking the loop.
1455
+ expect(calls.length).toBe(2);
2101
1456
 
2102
1457
  // After the compactor exhausted itself, the convergence loop
2103
1458
  // should have been triggered (contextTooLargeDetected set to true)
@@ -2132,83 +1487,32 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2132
1487
  return 170_000;
2133
1488
  };
2134
1489
 
2135
- // A single tool round reaches one checkpoint; the in-loop budget
2136
- // gate trips there and compaction runs in place. The loop continues
2137
- // the run itself rather than handing control back, so the
2138
- // orchestrator invokes `run()` exactly once.
2139
- let agentLoopCallCount = 0;
2140
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2141
- await onEvent({ type: "llm_call_started" });
2142
- agentLoopCallCount++;
2143
-
2144
- const withProgress: Message[] = [
2145
- ...messages,
2146
- {
2147
- role: "assistant" as const,
2148
- content: [
2149
- { type: "text", text: `Tool call ${agentLoopCallCount}` },
2150
- {
2151
- type: "tool_use",
2152
- id: `tu-${agentLoopCallCount}`,
2153
- name: "bash",
2154
- input: { command: "ls" },
2155
- },
2156
- ] as ContentBlock[],
2157
- },
2158
- {
2159
- role: "user" as const,
2160
- content: [
2161
- {
2162
- type: "tool_result",
2163
- tool_use_id: `tu-${agentLoopCallCount}`,
2164
- content: "output",
2165
- is_error: false,
2166
- },
2167
- ] as ContentBlock[],
2168
- },
2169
- ];
2170
-
2171
- onEvent({
2172
- type: "message_complete",
2173
- message: {
2174
- role: "assistant",
2175
- content: [
2176
- { type: "text", text: `Tool call ${agentLoopCallCount}` },
2177
- {
2178
- type: "tool_use",
2179
- id: `tu-${agentLoopCallCount}`,
2180
- name: "bash",
2181
- input: { command: "ls" },
2182
- },
2183
- ],
2184
- },
2185
- });
2186
- onEvent({
2187
- type: "usage",
2188
- inputTokens: 100,
2189
- outputTokens: 50,
2190
- model: "test-model",
2191
- providerDurationMs: 100,
2192
- });
2193
-
2194
- if (options?.onCheckpoint) {
2195
- await options.onCheckpoint({
2196
- turnIndex: 0,
2197
- toolCount: 1,
2198
- hasToolUse: true,
2199
- history: withProgress,
2200
- });
2201
- }
2202
-
2203
- return withProgress;
2204
- };
1490
+ // A single tool round reaches one checkpoint; the in-loop budget gate
1491
+ // trips there and compaction runs in place. The loop continues the run
1492
+ // itself — the following provider call returns plain text and the turn
1493
+ // completes — so the orchestrator never re-enters the convergence loop.
1494
+ const { provider, calls } = createMockProvider([
1495
+ toolUseResponse("tu-1", "bash", { command: "ls" }),
1496
+ textResponse("final answer"),
1497
+ ]);
2205
1498
 
2206
1499
  // Compaction reports `estimatedInputTokens` well below the 161_500
2207
1500
  // threshold — the "compaction is productive" signal (no `exhausted`
2208
1501
  // flag) that lets the loop continue in place.
2209
1502
  let compactionCallCount = 0;
2210
1503
  const ctx = makeCtx({
2211
- agentLoopRun,
1504
+ loopProvider: provider,
1505
+ loopTools: [
1506
+ {
1507
+ name: "bash",
1508
+ description: "Run a shell command",
1509
+ input_schema: {
1510
+ type: "object",
1511
+ properties: { command: { type: "string" } },
1512
+ },
1513
+ },
1514
+ ],
1515
+ toolExecutor: async () => ({ content: "output", isError: false }),
2212
1516
  contextWindowManager: {
2213
1517
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2214
1518
  maybeCompact: async () => {
@@ -2239,18 +1543,20 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2239
1543
 
2240
1544
  await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2241
1545
 
2242
- // 1 initial auto-compact + 1 productive mid-loop compaction. The
2243
- // loop continues in place after compacting, so the orchestrator
2244
- // never re-enters `run()` — it is invoked exactly once.
1546
+ // 1 initial auto-compact + 1 productive mid-loop compaction.
2245
1547
  expect(compactionCallCount).toBe(2);
2246
- expect(agentLoopCallCount).toBe(1);
1548
+ // The loop continued in place after compacting: a tool turn followed by
1549
+ // the post-compaction text turn, both within a single run.
1550
+ expect(calls.length).toBe(2);
2247
1551
 
2248
1552
  // No escalation to the convergence loop because the mid-loop
2249
- // `maybeCompact` returned productive (no `exhausted` flag).
1553
+ // `maybeCompact` returned productive (no `exhausted` flag), and the turn
1554
+ // completed normally.
2250
1555
  expect(setAgentLoopExitReasonOnLatestLogMock).not.toHaveBeenCalledWith(
2251
1556
  "test-conv",
2252
1557
  "context_too_large",
2253
1558
  );
1559
+ expect(events.find((e) => e.type === "conversation_error")).toBeUndefined();
2254
1560
  });
2255
1561
 
2256
1562
  // ── Test 9 ────────────────────────────────────────────────────────
@@ -2272,78 +1578,13 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2272
1578
  return 170_000;
2273
1579
  };
2274
1580
 
2275
- let agentLoopCallCount = 0;
2276
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2277
- // Prime the assistant row anchor production code emits this from
2278
- // `AgentLoop.run` just before `provider.sendMessage`.
2279
- await onEvent({ type: "llm_call_started" });
2280
- agentLoopCallCount++;
2281
-
2282
- const withProgress: Message[] = [
2283
- ...messages,
2284
- {
2285
- role: "assistant" as const,
2286
- content: [
2287
- { type: "text", text: `Tool call ${agentLoopCallCount}` },
2288
- {
2289
- type: "tool_use",
2290
- id: `tu-${agentLoopCallCount}`,
2291
- name: "bash",
2292
- input: { command: "ls" },
2293
- },
2294
- ] as ContentBlock[],
2295
- },
2296
- {
2297
- role: "user" as const,
2298
- content: [
2299
- {
2300
- type: "tool_result",
2301
- tool_use_id: `tu-${agentLoopCallCount}`,
2302
- content: "output",
2303
- is_error: false,
2304
- },
2305
- ] as ContentBlock[],
2306
- },
2307
- ];
2308
-
2309
- onEvent({
2310
- type: "message_complete",
2311
- message: {
2312
- role: "assistant",
2313
- content: [
2314
- { type: "text", text: `Tool call ${agentLoopCallCount}` },
2315
- {
2316
- type: "tool_use",
2317
- id: `tu-${agentLoopCallCount}`,
2318
- name: "bash",
2319
- input: { command: "ls" },
2320
- },
2321
- ],
2322
- },
2323
- });
2324
- onEvent({
2325
- type: "usage",
2326
- inputTokens: 100,
2327
- outputTokens: 50,
2328
- model: "test-model",
2329
- providerDurationMs: 100,
2330
- });
2331
-
2332
- // Always yield at checkpoint — simulates reduction not helping enough
2333
- if (options?.onCheckpoint) {
2334
- const decision = await options.onCheckpoint({
2335
- turnIndex: 0,
2336
- toolCount: 1,
2337
- hasToolUse: true,
2338
- history: withProgress,
2339
- });
2340
- if (decision !== "continue") {
2341
- return withProgress;
2342
- }
2343
- }
2344
-
2345
- return withProgress;
2346
- };
1581
+ // Every provider call returns a tool_use, so each loop run does a tool
1582
+ // turn that trips the mid-loop budget gate and yields "budget". The
1583
+ // initial run's gate calls compaction (exhausted); the convergence
1584
+ // reruns run without a compaction hook and yield directly.
1585
+ const { provider, calls } = createMockProvider([
1586
+ toolUseResponse("tu-1", "bash", { command: "ls" }),
1587
+ ]);
2347
1588
 
2348
1589
  // Convergence reducer: first call returns non-exhausted, second returns exhausted
2349
1590
  let reducerCallCount = 0;
@@ -2375,7 +1616,18 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2375
1616
  };
2376
1617
 
2377
1618
  const ctx = makeCtx({
2378
- agentLoopRun,
1619
+ loopProvider: provider,
1620
+ loopTools: [
1621
+ {
1622
+ name: "bash",
1623
+ description: "Run a shell command",
1624
+ input_schema: {
1625
+ type: "object",
1626
+ properties: { command: { type: "string" } },
1627
+ },
1628
+ },
1629
+ ],
1630
+ toolExecutor: async () => ({ content: "output", isError: false }),
2379
1631
  contextWindowManager: {
2380
1632
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2381
1633
  // Under the new architecture (Compaction Re-homing Arc, Bullet 1)
@@ -2413,10 +1665,11 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2413
1665
  // once more after yieldedForBudget triggered re-entry
2414
1666
  expect(reducerCallCount).toBe(2);
2415
1667
 
2416
- // Agent loop: 1 initial + 2 convergence re-runs = 3 calls. The mid-loop
2417
- // no longer drives daemon-level retries the manager owns its retry
2418
- // budget and signals exhaustion via the `exhausted` flag.
2419
- expect(agentLoopCallCount).toBe(3);
1668
+ // Provider calls: 1 initial run + 2 convergence reruns = 3 calls, each a
1669
+ // tool turn that yields "budget". The mid-loop no longer drives
1670
+ // daemon-level retries the manager owns its retry budget and signals
1671
+ // exhaustion via the `exhausted` flag.
1672
+ expect(calls.length).toBe(3);
2420
1673
  expect(setAgentLoopExitReasonOnLatestLogMock).toHaveBeenCalledWith(
2421
1674
  "test-conv",
2422
1675
  "context_too_large",
@@ -2516,35 +1769,10 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2516
1769
  };
2517
1770
  };
2518
1771
 
2519
- const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
2520
- // Prime the assistant row anchor production code emits this from
2521
- // `AgentLoop.run` just before `provider.sendMessage`.
2522
- await onEvent({ type: "llm_call_started" });
2523
- onEvent({
2524
- type: "message_complete",
2525
- message: {
2526
- role: "assistant",
2527
- content: [{ type: "text", text: "done" }],
2528
- },
2529
- });
2530
- onEvent({
2531
- type: "usage",
2532
- inputTokens: 170_000,
2533
- outputTokens: 200,
2534
- model: "test-model",
2535
- providerDurationMs: 500,
2536
- });
2537
- return [
2538
- ...messages,
2539
- {
2540
- role: "assistant" as const,
2541
- content: [{ type: "text", text: "done" }] as ContentBlock[],
2542
- },
2543
- ];
2544
- };
2545
-
1772
+ // The preflight overflow reducer runs in the orchestrator before the loop,
1773
+ // so a single successful provider turn is enough to drive the path.
2546
1774
  const ctx = makeCtx({
2547
- agentLoopRun,
1775
+ providerResponses: [textResponse("done")],
2548
1776
  contextWindowManager: {
2549
1777
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2550
1778
  maybeCompact: async () => ({ compacted: false }),
@@ -2615,78 +1843,12 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2615
1843
  // emergency compaction + final agentLoop.run path executes.
2616
1844
  mockOverflowAction = "auto_compress_latest_turn";
2617
1845
 
2618
- let agentLoopCallCount = 0;
2619
- const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
2620
- // Prime the assistant row anchor — production code emits this from
2621
- // `AgentLoop.run` just before `provider.sendMessage`.
2622
- await onEvent({ type: "llm_call_started" });
2623
- agentLoopCallCount++;
2624
-
2625
- const withProgress: Message[] = [
2626
- ...messages,
2627
- {
2628
- role: "assistant" as const,
2629
- content: [
2630
- { type: "text", text: `tool call ${agentLoopCallCount}` },
2631
- {
2632
- type: "tool_use",
2633
- id: `tu-${agentLoopCallCount}`,
2634
- name: "bash",
2635
- input: { command: "ls" },
2636
- },
2637
- ] as ContentBlock[],
2638
- },
2639
- {
2640
- role: "user" as const,
2641
- content: [
2642
- {
2643
- type: "tool_result",
2644
- tool_use_id: `tu-${agentLoopCallCount}`,
2645
- content: "output",
2646
- is_error: false,
2647
- },
2648
- ] as ContentBlock[],
2649
- },
2650
- ];
2651
-
2652
- onEvent({
2653
- type: "message_complete",
2654
- message: {
2655
- role: "assistant",
2656
- content: [
2657
- { type: "text", text: `tool call ${agentLoopCallCount}` },
2658
- {
2659
- type: "tool_use",
2660
- id: `tu-${agentLoopCallCount}`,
2661
- name: "bash",
2662
- input: { command: "ls" },
2663
- },
2664
- ],
2665
- },
2666
- });
2667
- onEvent({
2668
- type: "usage",
2669
- inputTokens: 100,
2670
- outputTokens: 50,
2671
- model: "test-model",
2672
- providerDurationMs: 100,
2673
- });
2674
-
2675
- // Every checkpoint yields — including the final auto_compress rerun.
2676
- if (options?.onCheckpoint) {
2677
- const decision = await options.onCheckpoint({
2678
- turnIndex: 0,
2679
- toolCount: 1,
2680
- hasToolUse: true,
2681
- history: withProgress,
2682
- });
2683
- if (decision !== "continue") {
2684
- return withProgress;
2685
- }
2686
- }
2687
-
2688
- return withProgress;
2689
- };
1846
+ // Every provider call returns a tool_use, so each loop run does a tool
1847
+ // turn that trips the mid-loop budget gate and yields "budget" —
1848
+ // including the final auto_compress rerun.
1849
+ const { provider } = createMockProvider([
1850
+ toolUseResponse("tu-1", "bash", { command: "ls" }),
1851
+ ]);
2690
1852
 
2691
1853
  // `maybeCompact` is invoked through three distinct call sites:
2692
1854
  // 1. Start-of-turn compaction (no `force` option) — return a no-op
@@ -2702,7 +1864,18 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2702
1864
  // as BUDGET_YIELD_UNRECOVERED.
2703
1865
  let forcedMaybeCompactCallCount = 0;
2704
1866
  const ctx = makeCtx({
2705
- agentLoopRun,
1867
+ loopProvider: provider,
1868
+ loopTools: [
1869
+ {
1870
+ name: "bash",
1871
+ description: "Run a shell command",
1872
+ input_schema: {
1873
+ type: "object",
1874
+ properties: { command: { type: "string" } },
1875
+ },
1876
+ },
1877
+ ],
1878
+ toolExecutor: async () => ({ content: "output", isError: false }),
2706
1879
  contextWindowManager: {
2707
1880
  shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2708
1881
  maybeCompact: async (