@vellumai/assistant 0.8.7 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/Dockerfile +20 -4
  2. package/docker-entrypoint.sh +4 -2
  3. package/docker-init-apt-root.sh +3 -1
  4. package/docker-kata-apt-env.sh +3 -1
  5. package/docker-kata-runtime-family.sh +12 -0
  6. package/docs/architecture/memory.md +1 -1
  7. package/docs/plugins.md +75 -79
  8. package/examples/plugins/echo/README.md +6 -12
  9. package/examples/plugins/echo/register.ts +0 -41
  10. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  11. package/openapi.yaml +3381 -348
  12. package/package.json +1 -1
  13. package/scripts/generate-openapi.ts +68 -41
  14. package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
  15. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  16. package/src/__tests__/agent-loop.test.ts +37 -87
  17. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  18. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  19. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  20. package/src/__tests__/anthropic-provider.test.ts +95 -2
  21. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  22. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  23. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  24. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  25. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  26. package/src/__tests__/btw-routes.test.ts +62 -3
  27. package/src/__tests__/build-persisted-content.test.ts +184 -0
  28. package/src/__tests__/catalog-files.test.ts +1 -1
  29. package/src/__tests__/clawhub-files.test.ts +1 -1
  30. package/src/__tests__/compaction-pipeline.test.ts +1 -1
  31. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  32. package/src/__tests__/config-watcher.test.ts +1 -1
  33. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  34. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
  35. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
  36. package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
  37. package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
  38. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  39. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  40. package/src/__tests__/conversation-pairing.test.ts +4 -31
  41. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  42. package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
  43. package/src/__tests__/conversation-queue.test.ts +2 -0
  44. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  45. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
  48. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  49. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  50. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  51. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  52. package/src/__tests__/conversation-title-service.test.ts +135 -2
  53. package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
  54. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  55. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  56. package/src/__tests__/dm-persistence.test.ts +5 -1
  57. package/src/__tests__/empty-response-hook.test.ts +304 -0
  58. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  59. package/src/__tests__/gemini-image-service.test.ts +13 -0
  60. package/src/__tests__/helpers/mock-provider.ts +110 -0
  61. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  62. package/src/__tests__/history-repair-hook.test.ts +1 -0
  63. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  64. package/src/__tests__/identity-routes.test.ts +248 -7
  65. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  66. package/src/__tests__/injector-background-turn.test.ts +2 -8
  67. package/src/__tests__/injector-chain.test.ts +106 -270
  68. package/src/__tests__/injector-disk-pressure.test.ts +3 -12
  69. package/src/__tests__/injector-document-comments.test.ts +2 -2
  70. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  71. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  72. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  73. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  74. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  75. package/src/__tests__/llm-usage-store.test.ts +223 -1
  76. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  77. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  78. package/src/__tests__/native-web-search.test.ts +191 -0
  79. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  80. package/src/__tests__/openai-image-service.test.ts +17 -0
  81. package/src/__tests__/openai-provider.test.ts +31 -1
  82. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  83. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  84. package/src/__tests__/pipeline-runner.test.ts +29 -39
  85. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  86. package/src/__tests__/plugin-bootstrap.test.ts +13 -28
  87. package/src/__tests__/plugin-registry.test.ts +0 -27
  88. package/src/__tests__/plugin-types.test.ts +2 -125
  89. package/src/__tests__/process-message-display-content.test.ts +6 -2
  90. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  91. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  92. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  93. package/src/__tests__/schedule-routes.test.ts +603 -2
  94. package/src/__tests__/schedule-store.test.ts +41 -0
  95. package/src/__tests__/schedule-tools.test.ts +35 -0
  96. package/src/__tests__/server-history-render.test.ts +314 -1
  97. package/src/__tests__/skillssh-files.test.ts +1 -1
  98. package/src/__tests__/system-prompt.test.ts +20 -0
  99. package/src/__tests__/task-scheduler.test.ts +162 -1
  100. package/src/__tests__/terminal-tools.test.ts +6 -1
  101. package/src/__tests__/title-generate-hook.test.ts +319 -0
  102. package/src/__tests__/tool-error-hook.test.ts +278 -0
  103. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  104. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  105. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  106. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  107. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  108. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  109. package/src/__tests__/usage-routes.test.ts +285 -1
  110. package/src/__tests__/user-plugin-loader.test.ts +2 -2
  111. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  112. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  113. package/src/agent/loop.ts +346 -442
  114. package/src/api/events/assistant-thinking-delta.ts +33 -0
  115. package/src/api/events/tool-output-chunk.ts +45 -0
  116. package/src/api/events/tool-use-preview-start.ts +32 -0
  117. package/src/api/events/trace-event.ts +69 -0
  118. package/src/api/index.ts +48 -13
  119. package/src/api/responses/conversation-message.ts +368 -0
  120. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  121. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  122. package/src/cli/commands/notifications.ts +112 -60
  123. package/src/config/assistant-feature-flags.ts +22 -11
  124. package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
  125. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  126. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  127. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  128. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  129. package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
  130. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  131. package/src/config/feature-flag-cache.ts +3 -3
  132. package/src/config/feature-flag-registry.json +35 -3
  133. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  134. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  135. package/src/config/schemas/llm.ts +1 -0
  136. package/src/config/schemas/memory-v2.ts +8 -0
  137. package/src/config/schemas/memory-v3.ts +8 -0
  138. package/src/config/schemas/platform.ts +8 -0
  139. package/src/config/seed-inference-profiles.ts +2 -2
  140. package/src/config/skills.ts +13 -0
  141. package/src/context/compactor.ts +1 -1
  142. package/src/context/strip-injections.ts +122 -0
  143. package/src/context/token-estimator.ts +23 -0
  144. package/src/context/tool-result-truncation.ts +0 -23
  145. package/src/context/window-manager.ts +3 -6
  146. package/src/credential-execution/executable-discovery.ts +16 -0
  147. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  148. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  149. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  150. package/src/daemon/assistant-attachments.ts +1 -1
  151. package/src/daemon/config-watcher.ts +2 -2
  152. package/src/daemon/context-overflow-reducer.ts +0 -1
  153. package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
  154. package/src/daemon/conversation-agent-loop.ts +281 -760
  155. package/src/daemon/conversation-history.ts +5 -4
  156. package/src/daemon/conversation-lifecycle.ts +3 -4
  157. package/src/daemon/conversation-messaging.ts +7 -6
  158. package/src/daemon/conversation-process.ts +11 -16
  159. package/src/daemon/conversation-runtime-assembly.ts +130 -347
  160. package/src/daemon/conversation-slash.ts +6 -25
  161. package/src/daemon/conversation-surfaces.ts +222 -4
  162. package/src/daemon/conversation-tool-setup.ts +2 -29
  163. package/src/daemon/conversation.ts +32 -14
  164. package/src/daemon/external-plugins-bootstrap.ts +9 -10
  165. package/src/daemon/handlers/config-a2a.ts +51 -36
  166. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  167. package/src/daemon/handlers/config-telegram.ts +16 -2
  168. package/src/daemon/handlers/shared.ts +156 -84
  169. package/src/daemon/handlers/skills.ts +39 -10
  170. package/src/daemon/lifecycle.ts +4 -0
  171. package/src/daemon/message-types/apps.ts +1 -29
  172. package/src/daemon/message-types/messages.ts +9 -57
  173. package/src/daemon/message-types/skills.ts +2 -0
  174. package/src/daemon/message-types/surfaces.ts +136 -3
  175. package/src/daemon/now-scratchpad.ts +21 -0
  176. package/src/daemon/orphan-reaper.test.ts +210 -0
  177. package/src/daemon/orphan-reaper.ts +240 -0
  178. package/src/daemon/persist-unsendable-image.ts +117 -0
  179. package/src/daemon/process-message.ts +1 -3
  180. package/src/daemon/trace-emitter.ts +6 -4
  181. package/src/daemon/trust-context.ts +19 -0
  182. package/src/daemon/wake-target-adapter.ts +3 -1
  183. package/src/home/home-greeting-cache.ts +24 -1
  184. package/src/ipc/gateway-client.test.ts +2 -2
  185. package/src/ipc/gateway-client.ts +3 -3
  186. package/src/media/gemini-image-service.ts +15 -0
  187. package/src/media/openai-image-service.ts +14 -0
  188. package/src/media/types.ts +34 -0
  189. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  190. package/src/memory/auth-fallback-events-store.ts +94 -0
  191. package/src/memory/conversation-title-service.ts +65 -41
  192. package/src/memory/db-init.ts +4 -0
  193. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  194. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  195. package/src/memory/jobs-store.ts +33 -0
  196. package/src/memory/jobs-worker.ts +31 -4
  197. package/src/memory/llm-usage-store.ts +224 -50
  198. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  199. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  200. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  201. package/src/memory/migrations/index.ts +2 -0
  202. package/src/memory/pkb/autoinject.ts +61 -0
  203. package/src/memory/pkb/context.ts +50 -0
  204. package/src/memory/pkb/types.ts +14 -0
  205. package/src/memory/schedule-attribution-sql.ts +104 -0
  206. package/src/memory/schema/infrastructure.ts +16 -0
  207. package/src/memory/usage-grouped-buckets.ts +6 -1
  208. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
  209. package/src/memory/v2/consolidation-job.ts +1 -1
  210. package/src/memory/v3/__tests__/health.test.ts +16 -0
  211. package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
  212. package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
  213. package/src/memory/v3/__tests__/router.test.ts +101 -29
  214. package/src/memory/v3/__tests__/selector.test.ts +93 -27
  215. package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
  216. package/src/memory/v3/health.ts +0 -0
  217. package/src/memory/v3/llm-retry.ts +32 -0
  218. package/src/memory/v3/orchestrate.ts +26 -14
  219. package/src/memory/v3/provider-blocks.ts +15 -5
  220. package/src/memory/v3/router.ts +48 -42
  221. package/src/memory/v3/selector.ts +57 -42
  222. package/src/memory/v3/shadow-plugin.ts +47 -15
  223. package/src/memory/v3/types.ts +8 -0
  224. package/src/notifications/conversation-pairing.ts +8 -15
  225. package/src/notifications/decision-engine.ts +6 -3
  226. package/src/notifications/home-feed-side-effect.ts +12 -1
  227. package/src/permissions/prompter.ts +4 -0
  228. package/src/plugin-api/constants.ts +4 -0
  229. package/src/plugin-api/index.ts +8 -1
  230. package/src/plugin-api/types.ts +151 -1
  231. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  232. package/src/plugins/defaults/empty-response/register.ts +8 -13
  233. package/src/plugins/defaults/index.ts +1 -15
  234. package/src/plugins/defaults/injectors/register.ts +243 -74
  235. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
  236. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  237. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  238. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  239. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  240. package/src/plugins/defaults/title-generate/package.json +1 -1
  241. package/src/plugins/defaults/title-generate/register.ts +18 -18
  242. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  243. package/src/plugins/defaults/tool-error/package.json +1 -1
  244. package/src/plugins/defaults/tool-error/register.ts +9 -21
  245. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  246. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  247. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  248. package/src/plugins/pipeline.ts +6 -18
  249. package/src/plugins/registry.ts +8 -25
  250. package/src/plugins/types.ts +43 -474
  251. package/src/proactive-artifact/aux-message-injector.ts +3 -3
  252. package/src/proactive-artifact/job.test.ts +7 -12
  253. package/src/prompts/__tests__/system-prompt.test.ts +36 -0
  254. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
  255. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  256. package/src/prompts/templates/system-sections.ts +15 -0
  257. package/src/providers/anthropic/client.ts +37 -29
  258. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  259. package/src/providers/openai/chat-completions-provider.ts +44 -0
  260. package/src/providers/openrouter/client.ts +1 -0
  261. package/src/providers/placeholder-sentinels.ts +35 -0
  262. package/src/runtime/__tests__/agent-wake.test.ts +5 -1
  263. package/src/runtime/agent-wake.ts +2 -2
  264. package/src/runtime/assistant-event-hub.ts +36 -6
  265. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  266. package/src/runtime/http-router.ts +16 -21
  267. package/src/runtime/http-types.ts +16 -70
  268. package/src/runtime/pending-interactions.ts +1 -0
  269. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  270. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  271. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  272. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  273. package/src/runtime/routes/app-management-routes.ts +6 -117
  274. package/src/runtime/routes/app-routes.ts +13 -15
  275. package/src/runtime/routes/attachment-routes.ts +26 -15
  276. package/src/runtime/routes/avatar-routes.ts +26 -0
  277. package/src/runtime/routes/btw-routes.ts +29 -23
  278. package/src/runtime/routes/consolidation-routes.ts +120 -20
  279. package/src/runtime/routes/conversation-query-routes.ts +2 -0
  280. package/src/runtime/routes/conversation-routes.ts +358 -184
  281. package/src/runtime/routes/documents-routes.ts +4 -0
  282. package/src/runtime/routes/domain-routes.ts +51 -37
  283. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  284. package/src/runtime/routes/events-routes.ts +28 -34
  285. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  286. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  287. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  288. package/src/runtime/routes/identity-routes.ts +208 -17
  289. package/src/runtime/routes/image-generation-routes.ts +40 -2
  290. package/src/runtime/routes/index.ts +2 -0
  291. package/src/runtime/routes/integrations/a2a.ts +12 -10
  292. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  293. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  294. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  295. package/src/runtime/routes/integrations/telegram.ts +6 -0
  296. package/src/runtime/routes/integrations/twilio.ts +42 -0
  297. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  298. package/src/runtime/routes/log-export-routes.ts +8 -0
  299. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  300. package/src/runtime/routes/memory-v3-routes.ts +50 -28
  301. package/src/runtime/routes/oauth-apps.ts +66 -12
  302. package/src/runtime/routes/oauth-providers.ts +44 -5
  303. package/src/runtime/routes/platform-routes.ts +81 -5
  304. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  305. package/src/runtime/routes/playground/force-compact.ts +1 -1
  306. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  307. package/src/runtime/routes/schedule-routes.ts +152 -42
  308. package/src/runtime/routes/secret-routes.ts +14 -2
  309. package/src/runtime/routes/skills-routes.ts +43 -14
  310. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  311. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  312. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  313. package/src/runtime/routes/tts-routes.ts +35 -0
  314. package/src/runtime/routes/types.ts +66 -8
  315. package/src/runtime/routes/usage-routes.ts +47 -39
  316. package/src/runtime/routes/webhook-routes.ts +41 -2
  317. package/src/runtime/routes/workspace-routes.ts +4 -0
  318. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  319. package/src/runtime/services/analyze-conversation.ts +2 -2
  320. package/src/schedule/schedule-store.ts +20 -1
  321. package/src/schedule/schedule-usage-store.ts +83 -0
  322. package/src/schedule/scheduler.ts +12 -5
  323. package/src/skills/catalog-files.ts +2 -2
  324. package/src/skills/catalog-install.ts +3 -0
  325. package/src/skills/categories-cache.ts +118 -0
  326. package/src/skills/clawhub-files.ts +1 -2
  327. package/src/skills/skillssh-files.ts +1 -2
  328. package/src/telemetry/types.ts +29 -1
  329. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  330. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  331. package/src/tools/executor.ts +1 -53
  332. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  333. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  334. package/src/tools/network/web-search-error.test.ts +248 -0
  335. package/src/tools/network/web-search-error.ts +267 -0
  336. package/src/tools/network/web-search.ts +207 -48
  337. package/src/tools/schedule/create.ts +2 -0
  338. package/src/tools/terminal/safe-env.ts +10 -1
  339. package/src/tools/ui-surface/definitions.ts +9 -1
  340. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  341. package/src/tts/provider-catalog.ts +76 -1
  342. package/src/util/mutex.ts +47 -0
  343. package/src/workspace/git-service.ts +1 -42
  344. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  345. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  346. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
  347. package/src/workspace/migrations/registry.ts +6 -0
  348. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  349. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  350. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  351. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  352. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  353. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  354. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  355. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  356. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  357. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  358. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  359. package/src/gallery/default-gallery.ts +0 -1359
  360. package/src/gallery/gallery-manifest.ts +0 -28
  361. package/src/home/feature-gate.ts +0 -22
  362. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  363. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  364. package/src/plugins/defaults/injectors/package.json +0 -15
  365. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  366. package/src/plugins/defaults/llm-call/package.json +0 -15
  367. package/src/plugins/defaults/llm-call/register.ts +0 -45
  368. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  369. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  370. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  371. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  372. package/src/plugins/defaults/persistence/package.json +0 -15
  373. package/src/plugins/defaults/persistence/register.ts +0 -38
  374. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  375. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  376. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  377. package/src/plugins/defaults/token-estimate/package.json +0 -15
  378. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  379. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  380. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  381. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  382. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  383. package/src/plugins/defaults/tool-execute/package.json +0 -15
  384. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  385. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  386. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  387. package/src/skills/category-inference.ts +0 -111
@@ -40,12 +40,35 @@ mock.module("../config/loader.js", () => ({
40
40
  }));
41
41
 
42
42
  // ── Mock conversation-crud (used by handleToolResult/handleMessageComplete) ──
43
+ // Reserve returns a role-distinct id so tests can tell the grouped tool-result
44
+ // `user` row apart from the assistant row, and assert it is reserved exactly
45
+ // once per batch. `updateMessageContent` is a spy so tests can inspect the
46
+ // content written into the row on each arrival.
47
+ // Widen the reservation window so concurrent tool-result handlers provably
48
+ // overlap before the first `reserveMessage` resolves; defaults to no delay.
49
+ let reserveMessageDelayMs = 0;
50
+ const reserveMessageMock = mock(
51
+ async (_conversationId: string, role: string) => {
52
+ if (reserveMessageDelayMs > 0) {
53
+ await new Promise((resolve) =>
54
+ setTimeout(resolve, reserveMessageDelayMs),
55
+ );
56
+ }
57
+ return { id: role === "user" ? "tool-result-row" : "assistant-row" };
58
+ },
59
+ );
60
+ const updateMessageContentMock = mock((_id: string, _content: string) => {});
61
+
43
62
  mock.module("../memory/conversation-crud.js", () => ({
44
- addMessage: () => ({ id: "mock-msg-id" }),
63
+ getConversation: () => null,
45
64
  getMessageById: () => null,
46
- updateMessageContent: () => {},
65
+ updateMessageContent: updateMessageContentMock,
47
66
  provenanceFromTrustContext: () => ({}),
48
- reserveMessage: mock(async () => ({ id: "msg-reserve" })),
67
+ reserveMessage: reserveMessageMock,
68
+ }));
69
+
70
+ mock.module("../memory/conversation-disk-view.js", () => ({
71
+ syncMessageToDisk: () => {},
49
72
  }));
50
73
 
51
74
  mock.module("../memory/llm-request-log-store.js", () => ({
@@ -53,19 +76,37 @@ mock.module("../memory/llm-request-log-store.js", () => ({
53
76
  backfillMessageIdOnLogs: () => {},
54
77
  }));
55
78
 
79
+ mock.module("../memory/memory-recall-log-store.js", () => ({
80
+ backfillMemoryRecallLogMessageId: () => {},
81
+ }));
82
+
83
+ mock.module("../memory/memory-v2-activation-log-store.js", () => ({
84
+ backfillMemoryV2ActivationMessageId: () => {},
85
+ }));
86
+
56
87
  // ── Imports (after mocks) ─────────────────────────────────────────────────────
88
+ import type { AgentEvent } from "../agent/loop.js";
57
89
  import type {
58
90
  EventHandlerDeps,
59
91
  EventHandlerState,
60
92
  } from "../daemon/conversation-agent-loop-handlers.js";
61
93
  import {
62
94
  createEventHandlerState,
95
+ dispatchAgentEvent,
63
96
  handleInputJsonDelta,
97
+ handleMessageComplete,
64
98
  handleToolResult,
65
99
  handleToolUse,
66
100
  handleToolUsePreviewStart,
67
101
  } from "../daemon/conversation-agent-loop-handlers.js";
68
102
  import type { ServerMessage } from "../daemon/message-protocol.js";
103
+ import type { AssistantEvent } from "../runtime/assistant-event.js";
104
+ import {
105
+ _resetStreamStateForTesting,
106
+ getCurrentSeq,
107
+ getPersistedSeq,
108
+ stampAndBuffer,
109
+ } from "../runtime/assistant-stream-state.js";
69
110
 
70
111
  // ── Helpers ───────────────────────────────────────────────────────────────────
71
112
 
@@ -243,7 +284,7 @@ describe("tool preview lifecycle", () => {
243
284
  expect((emitted as any).conversationId).toBe("test-session-id");
244
285
  });
245
286
 
246
- test("handleToolResult includes toolUseId", () => {
287
+ test("handleToolResult includes toolUseId", async () => {
247
288
  const collector = createEventCollector();
248
289
  const deps = createMockDeps({
249
290
  onEvent: collector.onEvent,
@@ -260,7 +301,7 @@ describe("tool preview lifecycle", () => {
260
301
  });
261
302
  state.currentTurnToolUseIds.push("toolu_result789");
262
303
 
263
- handleToolResult(state, deps, {
304
+ await handleToolResult(state, deps, {
264
305
  type: "tool_result",
265
306
  toolUseId: "toolu_result789",
266
307
  content: "file1.txt\nfile2.txt",
@@ -278,6 +319,428 @@ describe("tool preview lifecycle", () => {
278
319
 
279
320
  // ── Event ordering ────────────────────────────────────────────────────────
280
321
 
322
+ describe("persisted seq advances on tool_use_start", () => {
323
+ beforeEach(() => {
324
+ _resetStreamStateForTesting();
325
+ });
326
+
327
+ test("advances the conversation's persisted seq to the tool_use_start seq", () => {
328
+ /**
329
+ * The assistant row (including tool_use blocks) is persisted at
330
+ * message_complete, which precedes tool events. handleToolUse emits a
331
+ * seq-stamped tool_use_start afterward, so the persisted seq must catch
332
+ * up to that event -- otherwise /messages would advertise a seq below an
333
+ * event it already reflects.
334
+ */
335
+ // GIVEN an onEvent that stamps conversation-scoped events like the hub
336
+ const collector = createEventCollector();
337
+ const conversationId = "test-session-id";
338
+ const deps = createMockDeps({
339
+ onEvent: (msg: ServerMessage) => {
340
+ collector.events.push(msg);
341
+ stampAndBuffer(msg as unknown as AssistantEvent);
342
+ },
343
+ ctx: {
344
+ ...createMockDeps().ctx,
345
+ conversationId,
346
+ emitActivityState: collector.emitActivityState,
347
+ } as unknown as EventHandlerDeps["ctx"],
348
+ });
349
+
350
+ // AND prior streamed text deltas have already advanced the global seq
351
+ stampAndBuffer({
352
+ type: "assistant_text_delta",
353
+ text: "hello",
354
+ conversationId,
355
+ } as unknown as AssistantEvent);
356
+ stampAndBuffer({
357
+ type: "assistant_text_delta",
358
+ text: " world",
359
+ conversationId,
360
+ } as unknown as AssistantEvent);
361
+
362
+ // WHEN a tool_use is handled (its block is already durable)
363
+ handleToolUse(state, deps, {
364
+ type: "tool_use",
365
+ id: "toolu_abc123",
366
+ name: "bash",
367
+ input: { command: "ls" },
368
+ });
369
+
370
+ // THEN the persisted seq equals the just-stamped tool_use_start seq
371
+ const toolUseStart = collector.events.find(
372
+ (e) => e.type === "tool_use_start",
373
+ );
374
+ expect(toolUseStart).toBeDefined();
375
+ expect(getPersistedSeq(conversationId)).toBe(getCurrentSeq());
376
+ expect(getPersistedSeq(conversationId)).toBe(
377
+ (toolUseStart as unknown as AssistantEvent).seq ?? null,
378
+ );
379
+ });
380
+ });
381
+
382
+ describe("persisted seq advances at the turn boundary for all turn types", () => {
383
+ const conversationId = "test-session-id";
384
+
385
+ beforeEach(() => {
386
+ _resetStreamStateForTesting();
387
+ });
388
+
389
+ /** onEvent that stamps conversation-scoped events like the runtime hub. */
390
+ function makeStampingDeps(
391
+ overrides: Partial<EventHandlerDeps["ctx"]> = {},
392
+ ): { deps: EventHandlerDeps; events: ServerMessage[] } {
393
+ const events: ServerMessage[] = [];
394
+ const deps = createMockDeps({
395
+ onEvent: (msg: ServerMessage) => {
396
+ events.push(msg);
397
+ stampAndBuffer(msg as unknown as AssistantEvent);
398
+ },
399
+ ctx: {
400
+ ...createMockDeps().ctx,
401
+ conversationId,
402
+ ...overrides,
403
+ } as unknown as EventHandlerDeps["ctx"],
404
+ });
405
+ return { deps, events };
406
+ }
407
+
408
+ test("a streamed thinking delta is mirrored for incremental persistence", async () => {
409
+ /**
410
+ * Thinking rides the same mirror-and-flush path as text, so a thinking
411
+ * delta is appended to the running view and bumps the single persisted
412
+ * seq field -- the debounced partial flush then writes it to the row,
413
+ * letting long reasoning streams survive a refresh just like long
414
+ * answers do.
415
+ */
416
+ // GIVEN a turn that streams thinking
417
+ const { deps, events } = makeStampingDeps({ streamThinking: true });
418
+ state.lastAssistantMessageId = "assistant-msg-1";
419
+
420
+ // WHEN a thinking_delta is dispatched
421
+ await dispatchAgentEvent(state, deps, {
422
+ type: "thinking_delta",
423
+ thinking: "Let me reason about this.",
424
+ } as Extract<AgentEvent, { type: "thinking_delta" }>);
425
+
426
+ // THEN it is mirrored into the running view and the persisted seq field
427
+ // tracks the emitted delta
428
+ const thinkingDelta = events.find(
429
+ (e) => e.type === "assistant_thinking_delta",
430
+ );
431
+ expect(thinkingDelta).toBeDefined();
432
+ expect(state.currentMessageContent).toEqual([
433
+ {
434
+ type: "thinking",
435
+ thinking: "Let me reason about this.",
436
+ signature: "",
437
+ },
438
+ ]);
439
+ expect(state.lastPersistedContentSeq).toBe(
440
+ (thinkingDelta as unknown as AssistantEvent).seq ?? undefined,
441
+ );
442
+ });
443
+
444
+ test("a thinking-only turn advances the persisted seq to the thinking delta", async () => {
445
+ /**
446
+ * Reasoning-model turns can emit thinking with no text delta. Because
447
+ * thinking is now mirrored and flushed like text, the persisted seq
448
+ * advances to the streamed thinking_delta -- otherwise /messages would
449
+ * advertise a seq behind content the snapshot already reflects.
450
+ */
451
+ // GIVEN a turn that streams thinking (no text delta)
452
+ const { deps, events } = makeStampingDeps({ streamThinking: true });
453
+ state.lastAssistantMessageId = "assistant-msg-1";
454
+
455
+ // WHEN a thinking_delta is dispatched, then the turn completes
456
+ await dispatchAgentEvent(state, deps, {
457
+ type: "thinking_delta",
458
+ thinking: "Let me reason about this.",
459
+ } as Extract<AgentEvent, { type: "thinking_delta" }>);
460
+ await handleMessageComplete(state, deps, {
461
+ type: "message_complete",
462
+ message: {
463
+ role: "assistant",
464
+ content: [
465
+ { type: "thinking", thinking: "Let me reason about this." },
466
+ ],
467
+ },
468
+ } as Extract<AgentEvent, { type: "message_complete" }>);
469
+
470
+ // THEN the persisted seq equals the streamed thinking delta's seq
471
+ const thinkingDelta = events.find(
472
+ (e) => e.type === "assistant_thinking_delta",
473
+ );
474
+ expect(thinkingDelta).toBeDefined();
475
+ expect(getPersistedSeq(conversationId)).toBe(getCurrentSeq());
476
+ expect(getPersistedSeq(conversationId)).toBe(
477
+ (thinkingDelta as unknown as AssistantEvent).seq ?? null,
478
+ );
479
+ });
480
+
481
+ test("a tool result advances the persisted seq on arrival", async () => {
482
+ /**
483
+ * Tool results are persisted into their grouped row as they arrive (so a
484
+ * long-running tool's output survives a refresh), advancing the persisted
485
+ * seq to the just-stamped tool_result event rather than deferring to
486
+ * message_complete.
487
+ */
488
+ // GIVEN a tool whose result is about to arrive
489
+ const { deps, events } = makeStampingDeps({ streamThinking: true });
490
+ state.lastAssistantMessageId = "assistant-msg-1";
491
+ state.toolUseIdToName.set("toolu_result", "bash");
492
+ state.toolCallTimestamps.set("toolu_result", { startedAt: Date.now() });
493
+ state.currentTurnToolUseIds.push("toolu_result");
494
+
495
+ // WHEN the tool result is handled
496
+ await handleToolResult(state, deps, {
497
+ type: "tool_result",
498
+ toolUseId: "toolu_result",
499
+ content: "file1.txt\nfile2.txt",
500
+ isError: false,
501
+ });
502
+
503
+ // THEN the persisted seq equals the just-stamped tool_result seq
504
+ const toolResult = events.find((e) => e.type === "tool_result");
505
+ expect(toolResult).toBeDefined();
506
+ expect(getPersistedSeq(conversationId)).toBe(getCurrentSeq());
507
+ expect(getPersistedSeq(conversationId)).toBe(
508
+ (toolResult as unknown as AssistantEvent).seq ?? null,
509
+ );
510
+ });
511
+
512
+ test("thinking that is not streamed leaves the persisted seq unset", async () => {
513
+ /**
514
+ * When streamThinking is off, no thinking_delta SSE event is emitted, so
515
+ * nothing is mirrored and there is no stamped event to anchor a seq to.
516
+ * The turn must not invent a seq from unrelated global stream position.
517
+ */
518
+ // GIVEN a turn that does NOT stream thinking
519
+ const { deps, events } = makeStampingDeps({ streamThinking: false });
520
+ state.lastAssistantMessageId = "assistant-msg-1";
521
+
522
+ // WHEN a thinking_delta is dispatched, then the turn completes
523
+ await dispatchAgentEvent(state, deps, {
524
+ type: "thinking_delta",
525
+ thinking: "Internal reasoning.",
526
+ } as Extract<AgentEvent, { type: "thinking_delta" }>);
527
+ await handleMessageComplete(state, deps, {
528
+ type: "message_complete",
529
+ message: {
530
+ role: "assistant",
531
+ content: [{ type: "thinking", thinking: "Internal reasoning." }],
532
+ },
533
+ } as Extract<AgentEvent, { type: "message_complete" }>);
534
+
535
+ // THEN no thinking_delta was emitted and the persisted seq stays unset
536
+ expect(
537
+ events.find((e) => e.type === "assistant_thinking_delta"),
538
+ ).toBeUndefined();
539
+ expect(state.lastPersistedContentSeq).toBeUndefined();
540
+ expect(getPersistedSeq(conversationId)).toBeNull();
541
+ });
542
+ });
543
+
544
+ describe("tool results are persisted on arrival into a grouped row", () => {
545
+ const conversationId = "test-session-id";
546
+
547
+ beforeEach(() => {
548
+ _resetStreamStateForTesting();
549
+ reserveMessageMock.mockClear();
550
+ updateMessageContentMock.mockClear();
551
+ });
552
+
553
+ /** onEvent that stamps conversation-scoped events like the runtime hub. */
554
+ function makeStampingDeps(): {
555
+ deps: EventHandlerDeps;
556
+ events: ServerMessage[];
557
+ } {
558
+ const events: ServerMessage[] = [];
559
+ const deps = createMockDeps({
560
+ onEvent: (msg: ServerMessage) => {
561
+ events.push(msg);
562
+ stampAndBuffer(msg as unknown as AssistantEvent);
563
+ },
564
+ ctx: {
565
+ ...createMockDeps().ctx,
566
+ conversationId,
567
+ } as unknown as EventHandlerDeps["ctx"],
568
+ });
569
+ return { deps, events };
570
+ }
571
+
572
+ /** Register a tool as started so its result can be handled. */
573
+ function registerTool(toolUseId: string): void {
574
+ state.toolUseIdToName.set(toolUseId, "bash");
575
+ state.toolCallTimestamps.set(toolUseId, { startedAt: Date.now() });
576
+ state.currentTurnToolUseIds.push(toolUseId);
577
+ }
578
+
579
+ /** Parse the content of the most recent updateMessageContent call. */
580
+ function latestWrittenBlocks(): Array<Record<string, unknown>> {
581
+ const calls = updateMessageContentMock.mock.calls;
582
+ const last = calls[calls.length - 1];
583
+ return JSON.parse(last[1] as string);
584
+ }
585
+
586
+ test("the first result reserves one user row and writes its block", async () => {
587
+ /**
588
+ * The grouped tool-result row is a `user` message reserved when the first
589
+ * result of a batch arrives, then written via updateContent so the result
590
+ * is durable immediately.
591
+ */
592
+ // GIVEN a started tool
593
+ const { deps } = makeStampingDeps();
594
+ state.lastAssistantMessageId = "assistant-msg-1";
595
+ registerTool("toolu_a");
596
+
597
+ // WHEN its result arrives
598
+ await handleToolResult(state, deps, {
599
+ type: "tool_result",
600
+ toolUseId: "toolu_a",
601
+ content: "result-a",
602
+ isError: false,
603
+ });
604
+
605
+ // THEN a single user row was reserved and tracked, and its block written
606
+ const userReserves = reserveMessageMock.mock.calls.filter(
607
+ (call) => call[1] === "user",
608
+ );
609
+ expect(userReserves).toHaveLength(1);
610
+ expect(await state.pendingToolResultRowReservation).toBe(
611
+ "tool-result-row",
612
+ );
613
+ const blocks = latestWrittenBlocks();
614
+ expect(blocks).toHaveLength(1);
615
+ expect(blocks[0]).toMatchObject({
616
+ type: "tool_result",
617
+ tool_use_id: "toolu_a",
618
+ content: "result-a",
619
+ is_error: false,
620
+ });
621
+ });
622
+
623
+ test("parallel results share one row, grouped as sibling blocks", async () => {
624
+ /**
625
+ * Results from parallel tool calls in the same turn must land in a single
626
+ * `user` row (the tool_result-in-user-turn shape providers expect), so the
627
+ * row is reserved once and rewritten in place as each result arrives.
628
+ */
629
+ // GIVEN two started tools
630
+ const { deps } = makeStampingDeps();
631
+ state.lastAssistantMessageId = "assistant-msg-1";
632
+ registerTool("toolu_a");
633
+ registerTool("toolu_b");
634
+
635
+ // WHEN both results arrive
636
+ await handleToolResult(state, deps, {
637
+ type: "tool_result",
638
+ toolUseId: "toolu_a",
639
+ content: "result-a",
640
+ isError: false,
641
+ });
642
+ await handleToolResult(state, deps, {
643
+ type: "tool_result",
644
+ toolUseId: "toolu_b",
645
+ content: "result-b",
646
+ isError: false,
647
+ });
648
+
649
+ // THEN the row was reserved exactly once and now holds both blocks
650
+ const userReserves = reserveMessageMock.mock.calls.filter(
651
+ (call) => call[1] === "user",
652
+ );
653
+ expect(userReserves).toHaveLength(1);
654
+ const blocks = latestWrittenBlocks();
655
+ expect(blocks.map((b) => b.tool_use_id)).toEqual(["toolu_a", "toolu_b"]);
656
+ });
657
+
658
+ test("concurrent results race but reserve exactly one row", async () => {
659
+ /**
660
+ * `agent/loop.ts` dispatches each `tool_result` without awaiting, so two
661
+ * handlers for one parallel batch can enter reservation before the first
662
+ * `reserveMessage` resolves. A shared in-flight reservation promise must
663
+ * collapse them onto a single row rather than reserving one per result.
664
+ */
665
+ // GIVEN two started tools AND a reservation slow enough to overlap them
666
+ const { deps } = makeStampingDeps();
667
+ state.lastAssistantMessageId = "assistant-msg-1";
668
+ registerTool("toolu_a");
669
+ registerTool("toolu_b");
670
+ reserveMessageDelayMs = 10;
671
+
672
+ // WHEN both results are handled concurrently (neither awaited first)
673
+ try {
674
+ await Promise.all([
675
+ handleToolResult(state, deps, {
676
+ type: "tool_result",
677
+ toolUseId: "toolu_a",
678
+ content: "result-a",
679
+ isError: false,
680
+ }),
681
+ handleToolResult(state, deps, {
682
+ type: "tool_result",
683
+ toolUseId: "toolu_b",
684
+ content: "result-b",
685
+ isError: false,
686
+ }),
687
+ ]);
688
+ } finally {
689
+ reserveMessageDelayMs = 0;
690
+ }
691
+
692
+ // THEN exactly one user row was reserved and it holds both sibling blocks
693
+ const userReserves = reserveMessageMock.mock.calls.filter(
694
+ (call) => call[1] === "user",
695
+ );
696
+ expect(userReserves).toHaveLength(1);
697
+ expect(await state.pendingToolResultRowReservation).toBe(
698
+ "tool-result-row",
699
+ );
700
+ const blocks = latestWrittenBlocks();
701
+ expect(blocks.map((b) => b.tool_use_id).sort()).toEqual([
702
+ "toolu_a",
703
+ "toolu_b",
704
+ ]);
705
+ });
706
+
707
+ test("message_complete finalizes the on-arrival row without a second reserve", async () => {
708
+ /**
709
+ * Because the row already exists from the on-arrival write, the
710
+ * message_complete drain finalizes it (rewrite + bookkeeping) instead of
711
+ * inserting a second row, then clears the batch state.
712
+ */
713
+ // GIVEN a result already persisted on arrival
714
+ const { deps } = makeStampingDeps();
715
+ state.lastAssistantMessageId = "assistant-msg-1";
716
+ registerTool("toolu_a");
717
+ await handleToolResult(state, deps, {
718
+ type: "tool_result",
719
+ toolUseId: "toolu_a",
720
+ content: "result-a",
721
+ isError: false,
722
+ });
723
+ const reservesAfterArrival = reserveMessageMock.mock.calls.filter(
724
+ (call) => call[1] === "user",
725
+ ).length;
726
+
727
+ // WHEN the next call completes, draining the buffered result
728
+ await handleMessageComplete(state, deps, {
729
+ type: "message_complete",
730
+ message: { role: "assistant", content: [{ type: "text", text: "ok" }] },
731
+ } as Extract<AgentEvent, { type: "message_complete" }>);
732
+
733
+ // THEN no additional user row was reserved and the batch state is cleared
734
+ const reservesAfterDrain = reserveMessageMock.mock.calls.filter(
735
+ (call) => call[1] === "user",
736
+ ).length;
737
+ expect(reservesAfterDrain).toBe(reservesAfterArrival);
738
+ expect(state.pendingToolResults.size).toBe(0);
739
+ expect(state.pendingToolResultRowReservation).toBeUndefined();
740
+ expect(state.persistedToolUseIds.has("toolu_a")).toBe(true);
741
+ });
742
+ });
743
+
281
744
  describe("event ordering", () => {
282
745
  test("events are emitted in correct order: tool_use_preview_start → tool_input_delta → tool_use", () => {
283
746
  const collector = createEventCollector();
@@ -98,6 +98,7 @@ function createCollectorDeps(): {
98
98
  userMessageInterface: "macos",
99
99
  assistantMessageInterface: "macos",
100
100
  } as EventHandlerDeps["turnInterfaceContext"],
101
+ applyCompaction: async () => {},
101
102
  } as EventHandlerDeps;
102
103
  return { deps, events };
103
104
  }
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Tests for the default `tool-result-truncate` plugin's `post-tool-use` hook.
3
+ *
4
+ * Covers:
5
+ * - The hook tail-drops an oversized `toolResponse.content` to the budget
6
+ * derived from `maxInputTokens`, matching `truncateToolResult`, and is a
7
+ * no-op for content that already fits.
8
+ * - End-to-end through `runHook` + the registry: registering the default
9
+ * plugin makes the hook fire and truncate the tool response.
10
+ * - Chain ordering: because defaults register first, the default hook runs
11
+ * ahead of a later-registered user hook, which therefore observes an
12
+ * already-truncated response.
13
+ */
14
+
15
+ import { beforeEach, describe, expect, test } from "bun:test";
16
+
17
+ import { HOOKS } from "../plugin-api/constants.js";
18
+ import type { PluginLogger, PostToolUseContext } from "../plugin-api/types.js";
19
+ import postToolUse from "../plugins/defaults/tool-result-truncate/hooks/post-tool-use.js";
20
+ import { defaultToolResultTruncatePlugin } from "../plugins/defaults/tool-result-truncate/register.js";
21
+ import {
22
+ truncateToolResult,
23
+ TRUNCATION_SUFFIX,
24
+ } from "../plugins/defaults/tool-result-truncate/terminal.js";
25
+ import { runHook } from "../plugins/pipeline.js";
26
+ import {
27
+ registerPlugin,
28
+ resetPluginRegistryForTests,
29
+ } from "../plugins/registry.js";
30
+ import type { ToolResultContent } from "../providers/types.js";
31
+
32
+ const noopLogger: PluginLogger = {
33
+ info: () => {},
34
+ warn: () => {},
35
+ error: () => {},
36
+ debug: () => {},
37
+ };
38
+
39
+ const MAX_INPUT_TOKENS = 10_000;
40
+
41
+ function makeToolResponse(content: string): ToolResultContent {
42
+ return { type: "tool_result", tool_use_id: "tu_1", content };
43
+ }
44
+
45
+ function makeCtx(content: string): PostToolUseContext {
46
+ return {
47
+ conversationId: "conv-test",
48
+ toolResponse: makeToolResponse(content),
49
+ messages: [],
50
+ maxInputTokens: MAX_INPUT_TOKENS,
51
+ logger: noopLogger,
52
+ };
53
+ }
54
+
55
+ describe("tool-result-truncate post-tool-use hook — direct", () => {
56
+ test("truncates oversized content identically to truncateToolResult", async () => {
57
+ // GIVEN a tool response whose content far exceeds the derived budget.
58
+ const content = "a".repeat(1_000_000);
59
+ const expected = truncateToolResult(content, MAX_INPUT_TOKENS);
60
+ const ctx = makeCtx(content);
61
+
62
+ // WHEN the hook runs over the context.
63
+ await postToolUse(ctx);
64
+
65
+ // THEN the response content matches the canonical truncation output.
66
+ expect(expected.truncated).toBe(true);
67
+ expect(ctx.toolResponse.content).toBe(expected.content);
68
+ expect(ctx.toolResponse.content).toContain(TRUNCATION_SUFFIX);
69
+ });
70
+
71
+ test("is a no-op for content that already fits the budget", async () => {
72
+ // GIVEN a tool response well within the derived budget.
73
+ const content = "small result";
74
+ const ctx = makeCtx(content);
75
+
76
+ // WHEN the hook runs.
77
+ await postToolUse(ctx);
78
+
79
+ // THEN the content is unchanged.
80
+ expect(ctx.toolResponse.content).toBe(content);
81
+ });
82
+ });
83
+
84
+ describe("tool-result-truncate post-tool-use hook — via runHook", () => {
85
+ beforeEach(() => {
86
+ resetPluginRegistryForTests();
87
+ });
88
+
89
+ test("registering the default plugin truncates an oversized response", async () => {
90
+ // GIVEN the default tool-result-truncate plugin is registered.
91
+ registerPlugin(defaultToolResultTruncatePlugin);
92
+ const content = "a".repeat(1_000_000);
93
+ const expected = truncateToolResult(content, MAX_INPUT_TOKENS);
94
+
95
+ // WHEN the post-tool-use chain runs.
96
+ const result = await runHook<PostToolUseContext>(
97
+ HOOKS.POST_TOOL_USE,
98
+ makeCtx(content),
99
+ );
100
+
101
+ // THEN the tool response is truncated.
102
+ expect(result.toolResponse.content).toBe(expected.content);
103
+ });
104
+
105
+ test("default hook runs before a later-registered user hook", async () => {
106
+ // GIVEN the default plugin is registered first, then a user plugin whose
107
+ // hook records the response content it observes.
108
+ let observed: string | null = null;
109
+ registerPlugin(defaultToolResultTruncatePlugin);
110
+ registerPlugin({
111
+ manifest: { name: "observer-plugin", version: "0.0.1" },
112
+ hooks: {
113
+ "post-tool-use": async (ctx: PostToolUseContext) => {
114
+ observed = ctx.toolResponse.content;
115
+ },
116
+ },
117
+ });
118
+ const content = "a".repeat(1_000_000);
119
+ const expected = truncateToolResult(content, MAX_INPUT_TOKENS);
120
+
121
+ // WHEN the chain runs.
122
+ await runHook<PostToolUseContext>(HOOKS.POST_TOOL_USE, makeCtx(content));
123
+
124
+ // THEN the user hook saw the already-truncated content.
125
+ expect(observed as string | null).toBe(expected.content);
126
+ });
127
+ });
@@ -3,8 +3,6 @@ import { describe, expect, test } from "bun:test";
3
3
  import {
4
4
  calculateMaxToolResultChars,
5
5
  HARD_MAX_TOOL_RESULT_CHARS,
6
- } from "../context/tool-result-truncation.js";
7
- import {
8
6
  MIN_KEEP_CHARS,
9
7
  truncateToolResultText,
10
8
  TRUNCATION_SUFFIX,