@vellumai/assistant 0.8.7 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/Dockerfile +20 -4
  2. package/docker-entrypoint.sh +4 -2
  3. package/docker-init-apt-root.sh +3 -1
  4. package/docker-kata-apt-env.sh +3 -1
  5. package/docker-kata-runtime-family.sh +12 -0
  6. package/docs/architecture/memory.md +1 -1
  7. package/docs/plugins.md +75 -79
  8. package/examples/plugins/echo/README.md +6 -12
  9. package/examples/plugins/echo/register.ts +0 -41
  10. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  11. package/openapi.yaml +3381 -348
  12. package/package.json +1 -1
  13. package/scripts/generate-openapi.ts +68 -41
  14. package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
  15. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  16. package/src/__tests__/agent-loop.test.ts +37 -87
  17. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  18. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  19. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  20. package/src/__tests__/anthropic-provider.test.ts +95 -2
  21. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  22. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  23. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  24. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  25. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  26. package/src/__tests__/btw-routes.test.ts +62 -3
  27. package/src/__tests__/build-persisted-content.test.ts +184 -0
  28. package/src/__tests__/catalog-files.test.ts +1 -1
  29. package/src/__tests__/clawhub-files.test.ts +1 -1
  30. package/src/__tests__/compaction-pipeline.test.ts +1 -1
  31. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  32. package/src/__tests__/config-watcher.test.ts +1 -1
  33. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  34. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
  35. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
  36. package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
  37. package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
  38. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  39. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  40. package/src/__tests__/conversation-pairing.test.ts +4 -31
  41. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  42. package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
  43. package/src/__tests__/conversation-queue.test.ts +2 -0
  44. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  45. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
  48. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  49. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  50. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  51. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  52. package/src/__tests__/conversation-title-service.test.ts +135 -2
  53. package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
  54. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  55. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  56. package/src/__tests__/dm-persistence.test.ts +5 -1
  57. package/src/__tests__/empty-response-hook.test.ts +304 -0
  58. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  59. package/src/__tests__/gemini-image-service.test.ts +13 -0
  60. package/src/__tests__/helpers/mock-provider.ts +110 -0
  61. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  62. package/src/__tests__/history-repair-hook.test.ts +1 -0
  63. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  64. package/src/__tests__/identity-routes.test.ts +248 -7
  65. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  66. package/src/__tests__/injector-background-turn.test.ts +2 -8
  67. package/src/__tests__/injector-chain.test.ts +106 -270
  68. package/src/__tests__/injector-disk-pressure.test.ts +3 -12
  69. package/src/__tests__/injector-document-comments.test.ts +2 -2
  70. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  71. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  72. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  73. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  74. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  75. package/src/__tests__/llm-usage-store.test.ts +223 -1
  76. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  77. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  78. package/src/__tests__/native-web-search.test.ts +191 -0
  79. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  80. package/src/__tests__/openai-image-service.test.ts +17 -0
  81. package/src/__tests__/openai-provider.test.ts +31 -1
  82. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  83. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  84. package/src/__tests__/pipeline-runner.test.ts +29 -39
  85. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  86. package/src/__tests__/plugin-bootstrap.test.ts +13 -28
  87. package/src/__tests__/plugin-registry.test.ts +0 -27
  88. package/src/__tests__/plugin-types.test.ts +2 -125
  89. package/src/__tests__/process-message-display-content.test.ts +6 -2
  90. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  91. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  92. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  93. package/src/__tests__/schedule-routes.test.ts +603 -2
  94. package/src/__tests__/schedule-store.test.ts +41 -0
  95. package/src/__tests__/schedule-tools.test.ts +35 -0
  96. package/src/__tests__/server-history-render.test.ts +314 -1
  97. package/src/__tests__/skillssh-files.test.ts +1 -1
  98. package/src/__tests__/system-prompt.test.ts +20 -0
  99. package/src/__tests__/task-scheduler.test.ts +162 -1
  100. package/src/__tests__/terminal-tools.test.ts +6 -1
  101. package/src/__tests__/title-generate-hook.test.ts +319 -0
  102. package/src/__tests__/tool-error-hook.test.ts +278 -0
  103. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  104. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  105. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  106. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  107. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  108. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  109. package/src/__tests__/usage-routes.test.ts +285 -1
  110. package/src/__tests__/user-plugin-loader.test.ts +2 -2
  111. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  112. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  113. package/src/agent/loop.ts +346 -442
  114. package/src/api/events/assistant-thinking-delta.ts +33 -0
  115. package/src/api/events/tool-output-chunk.ts +45 -0
  116. package/src/api/events/tool-use-preview-start.ts +32 -0
  117. package/src/api/events/trace-event.ts +69 -0
  118. package/src/api/index.ts +48 -13
  119. package/src/api/responses/conversation-message.ts +368 -0
  120. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  121. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  122. package/src/cli/commands/notifications.ts +112 -60
  123. package/src/config/assistant-feature-flags.ts +22 -11
  124. package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
  125. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  126. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  127. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  128. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  129. package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
  130. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  131. package/src/config/feature-flag-cache.ts +3 -3
  132. package/src/config/feature-flag-registry.json +35 -3
  133. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  134. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  135. package/src/config/schemas/llm.ts +1 -0
  136. package/src/config/schemas/memory-v2.ts +8 -0
  137. package/src/config/schemas/memory-v3.ts +8 -0
  138. package/src/config/schemas/platform.ts +8 -0
  139. package/src/config/seed-inference-profiles.ts +2 -2
  140. package/src/config/skills.ts +13 -0
  141. package/src/context/compactor.ts +1 -1
  142. package/src/context/strip-injections.ts +122 -0
  143. package/src/context/token-estimator.ts +23 -0
  144. package/src/context/tool-result-truncation.ts +0 -23
  145. package/src/context/window-manager.ts +3 -6
  146. package/src/credential-execution/executable-discovery.ts +16 -0
  147. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  148. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  149. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  150. package/src/daemon/assistant-attachments.ts +1 -1
  151. package/src/daemon/config-watcher.ts +2 -2
  152. package/src/daemon/context-overflow-reducer.ts +0 -1
  153. package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
  154. package/src/daemon/conversation-agent-loop.ts +281 -760
  155. package/src/daemon/conversation-history.ts +5 -4
  156. package/src/daemon/conversation-lifecycle.ts +3 -4
  157. package/src/daemon/conversation-messaging.ts +7 -6
  158. package/src/daemon/conversation-process.ts +11 -16
  159. package/src/daemon/conversation-runtime-assembly.ts +130 -347
  160. package/src/daemon/conversation-slash.ts +6 -25
  161. package/src/daemon/conversation-surfaces.ts +222 -4
  162. package/src/daemon/conversation-tool-setup.ts +2 -29
  163. package/src/daemon/conversation.ts +32 -14
  164. package/src/daemon/external-plugins-bootstrap.ts +9 -10
  165. package/src/daemon/handlers/config-a2a.ts +51 -36
  166. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  167. package/src/daemon/handlers/config-telegram.ts +16 -2
  168. package/src/daemon/handlers/shared.ts +156 -84
  169. package/src/daemon/handlers/skills.ts +39 -10
  170. package/src/daemon/lifecycle.ts +4 -0
  171. package/src/daemon/message-types/apps.ts +1 -29
  172. package/src/daemon/message-types/messages.ts +9 -57
  173. package/src/daemon/message-types/skills.ts +2 -0
  174. package/src/daemon/message-types/surfaces.ts +136 -3
  175. package/src/daemon/now-scratchpad.ts +21 -0
  176. package/src/daemon/orphan-reaper.test.ts +210 -0
  177. package/src/daemon/orphan-reaper.ts +240 -0
  178. package/src/daemon/persist-unsendable-image.ts +117 -0
  179. package/src/daemon/process-message.ts +1 -3
  180. package/src/daemon/trace-emitter.ts +6 -4
  181. package/src/daemon/trust-context.ts +19 -0
  182. package/src/daemon/wake-target-adapter.ts +3 -1
  183. package/src/home/home-greeting-cache.ts +24 -1
  184. package/src/ipc/gateway-client.test.ts +2 -2
  185. package/src/ipc/gateway-client.ts +3 -3
  186. package/src/media/gemini-image-service.ts +15 -0
  187. package/src/media/openai-image-service.ts +14 -0
  188. package/src/media/types.ts +34 -0
  189. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  190. package/src/memory/auth-fallback-events-store.ts +94 -0
  191. package/src/memory/conversation-title-service.ts +65 -41
  192. package/src/memory/db-init.ts +4 -0
  193. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  194. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  195. package/src/memory/jobs-store.ts +33 -0
  196. package/src/memory/jobs-worker.ts +31 -4
  197. package/src/memory/llm-usage-store.ts +224 -50
  198. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  199. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  200. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  201. package/src/memory/migrations/index.ts +2 -0
  202. package/src/memory/pkb/autoinject.ts +61 -0
  203. package/src/memory/pkb/context.ts +50 -0
  204. package/src/memory/pkb/types.ts +14 -0
  205. package/src/memory/schedule-attribution-sql.ts +104 -0
  206. package/src/memory/schema/infrastructure.ts +16 -0
  207. package/src/memory/usage-grouped-buckets.ts +6 -1
  208. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
  209. package/src/memory/v2/consolidation-job.ts +1 -1
  210. package/src/memory/v3/__tests__/health.test.ts +16 -0
  211. package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
  212. package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
  213. package/src/memory/v3/__tests__/router.test.ts +101 -29
  214. package/src/memory/v3/__tests__/selector.test.ts +93 -27
  215. package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
  216. package/src/memory/v3/health.ts +0 -0
  217. package/src/memory/v3/llm-retry.ts +32 -0
  218. package/src/memory/v3/orchestrate.ts +26 -14
  219. package/src/memory/v3/provider-blocks.ts +15 -5
  220. package/src/memory/v3/router.ts +48 -42
  221. package/src/memory/v3/selector.ts +57 -42
  222. package/src/memory/v3/shadow-plugin.ts +47 -15
  223. package/src/memory/v3/types.ts +8 -0
  224. package/src/notifications/conversation-pairing.ts +8 -15
  225. package/src/notifications/decision-engine.ts +6 -3
  226. package/src/notifications/home-feed-side-effect.ts +12 -1
  227. package/src/permissions/prompter.ts +4 -0
  228. package/src/plugin-api/constants.ts +4 -0
  229. package/src/plugin-api/index.ts +8 -1
  230. package/src/plugin-api/types.ts +151 -1
  231. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  232. package/src/plugins/defaults/empty-response/register.ts +8 -13
  233. package/src/plugins/defaults/index.ts +1 -15
  234. package/src/plugins/defaults/injectors/register.ts +243 -74
  235. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
  236. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  237. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  238. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  239. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  240. package/src/plugins/defaults/title-generate/package.json +1 -1
  241. package/src/plugins/defaults/title-generate/register.ts +18 -18
  242. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  243. package/src/plugins/defaults/tool-error/package.json +1 -1
  244. package/src/plugins/defaults/tool-error/register.ts +9 -21
  245. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  246. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  247. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  248. package/src/plugins/pipeline.ts +6 -18
  249. package/src/plugins/registry.ts +8 -25
  250. package/src/plugins/types.ts +43 -474
  251. package/src/proactive-artifact/aux-message-injector.ts +3 -3
  252. package/src/proactive-artifact/job.test.ts +7 -12
  253. package/src/prompts/__tests__/system-prompt.test.ts +36 -0
  254. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
  255. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  256. package/src/prompts/templates/system-sections.ts +15 -0
  257. package/src/providers/anthropic/client.ts +37 -29
  258. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  259. package/src/providers/openai/chat-completions-provider.ts +44 -0
  260. package/src/providers/openrouter/client.ts +1 -0
  261. package/src/providers/placeholder-sentinels.ts +35 -0
  262. package/src/runtime/__tests__/agent-wake.test.ts +5 -1
  263. package/src/runtime/agent-wake.ts +2 -2
  264. package/src/runtime/assistant-event-hub.ts +36 -6
  265. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  266. package/src/runtime/http-router.ts +16 -21
  267. package/src/runtime/http-types.ts +16 -70
  268. package/src/runtime/pending-interactions.ts +1 -0
  269. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  270. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  271. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  272. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  273. package/src/runtime/routes/app-management-routes.ts +6 -117
  274. package/src/runtime/routes/app-routes.ts +13 -15
  275. package/src/runtime/routes/attachment-routes.ts +26 -15
  276. package/src/runtime/routes/avatar-routes.ts +26 -0
  277. package/src/runtime/routes/btw-routes.ts +29 -23
  278. package/src/runtime/routes/consolidation-routes.ts +120 -20
  279. package/src/runtime/routes/conversation-query-routes.ts +2 -0
  280. package/src/runtime/routes/conversation-routes.ts +358 -184
  281. package/src/runtime/routes/documents-routes.ts +4 -0
  282. package/src/runtime/routes/domain-routes.ts +51 -37
  283. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  284. package/src/runtime/routes/events-routes.ts +28 -34
  285. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  286. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  287. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  288. package/src/runtime/routes/identity-routes.ts +208 -17
  289. package/src/runtime/routes/image-generation-routes.ts +40 -2
  290. package/src/runtime/routes/index.ts +2 -0
  291. package/src/runtime/routes/integrations/a2a.ts +12 -10
  292. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  293. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  294. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  295. package/src/runtime/routes/integrations/telegram.ts +6 -0
  296. package/src/runtime/routes/integrations/twilio.ts +42 -0
  297. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  298. package/src/runtime/routes/log-export-routes.ts +8 -0
  299. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  300. package/src/runtime/routes/memory-v3-routes.ts +50 -28
  301. package/src/runtime/routes/oauth-apps.ts +66 -12
  302. package/src/runtime/routes/oauth-providers.ts +44 -5
  303. package/src/runtime/routes/platform-routes.ts +81 -5
  304. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  305. package/src/runtime/routes/playground/force-compact.ts +1 -1
  306. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  307. package/src/runtime/routes/schedule-routes.ts +152 -42
  308. package/src/runtime/routes/secret-routes.ts +14 -2
  309. package/src/runtime/routes/skills-routes.ts +43 -14
  310. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  311. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  312. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  313. package/src/runtime/routes/tts-routes.ts +35 -0
  314. package/src/runtime/routes/types.ts +66 -8
  315. package/src/runtime/routes/usage-routes.ts +47 -39
  316. package/src/runtime/routes/webhook-routes.ts +41 -2
  317. package/src/runtime/routes/workspace-routes.ts +4 -0
  318. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  319. package/src/runtime/services/analyze-conversation.ts +2 -2
  320. package/src/schedule/schedule-store.ts +20 -1
  321. package/src/schedule/schedule-usage-store.ts +83 -0
  322. package/src/schedule/scheduler.ts +12 -5
  323. package/src/skills/catalog-files.ts +2 -2
  324. package/src/skills/catalog-install.ts +3 -0
  325. package/src/skills/categories-cache.ts +118 -0
  326. package/src/skills/clawhub-files.ts +1 -2
  327. package/src/skills/skillssh-files.ts +1 -2
  328. package/src/telemetry/types.ts +29 -1
  329. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  330. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  331. package/src/tools/executor.ts +1 -53
  332. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  333. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  334. package/src/tools/network/web-search-error.test.ts +248 -0
  335. package/src/tools/network/web-search-error.ts +267 -0
  336. package/src/tools/network/web-search.ts +207 -48
  337. package/src/tools/schedule/create.ts +2 -0
  338. package/src/tools/terminal/safe-env.ts +10 -1
  339. package/src/tools/ui-surface/definitions.ts +9 -1
  340. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  341. package/src/tts/provider-catalog.ts +76 -1
  342. package/src/util/mutex.ts +47 -0
  343. package/src/workspace/git-service.ts +1 -42
  344. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  345. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  346. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
  347. package/src/workspace/migrations/registry.ts +6 -0
  348. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  349. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  350. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  351. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  352. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  353. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  354. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  355. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  356. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  357. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  358. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  359. package/src/gallery/default-gallery.ts +0 -1359
  360. package/src/gallery/gallery-manifest.ts +0 -28
  361. package/src/home/feature-gate.ts +0 -22
  362. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  363. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  364. package/src/plugins/defaults/injectors/package.json +0 -15
  365. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  366. package/src/plugins/defaults/llm-call/package.json +0 -15
  367. package/src/plugins/defaults/llm-call/register.ts +0 -45
  368. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  369. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  370. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  371. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  372. package/src/plugins/defaults/persistence/package.json +0 -15
  373. package/src/plugins/defaults/persistence/register.ts +0 -38
  374. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  375. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  376. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  377. package/src/plugins/defaults/token-estimate/package.json +0 -15
  378. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  379. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  380. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  381. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  382. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  383. package/src/plugins/defaults/tool-execute/package.json +0 -15
  384. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  385. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  386. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  387. package/src/skills/category-inference.ts +0 -111
@@ -0,0 +1,297 @@
1
+ /**
2
+ * Tests for the default `user-prompt-submit-temp` hook (memory retrieval).
3
+ *
4
+ * Covers the retrieval behavior, the side effects the hook owns (injected-block
5
+ * metadata, recall log, `memory_recalled` event), trust gating, error
6
+ * propagation, and abort-signal forwarding. Uses `mock.module` to stub the
7
+ * persistence helpers so the test doesn't touch the developer's real
8
+ * `~/.vellum` or database. The memory graph handle is a hand-rolled fake
9
+ * passed on the hook context — the hook only needs `prepareMemory`.
10
+ */
11
+
12
+ import { beforeEach, describe, expect, mock, test } from "bun:test";
13
+
14
+ // Stub the persistence helpers BEFORE importing the module under test so the
15
+ // bindings resolve through the mocks.
16
+ const updateMessageMetadataMock = mock((_id: string, _updates: unknown) => {});
17
+ mock.module("../memory/conversation-crud.js", () => ({
18
+ updateMessageMetadata: updateMessageMetadataMock,
19
+ }));
20
+
21
+ const recordMemoryRecallLogMock = mock((_entry: unknown) => {});
22
+ mock.module("../memory/memory-recall-log-store.js", () => ({
23
+ recordMemoryRecallLog: recordMemoryRecallLogMock,
24
+ }));
25
+
26
+ import type { AssistantConfig } from "../config/schema.js";
27
+ import type { ServerMessage } from "../daemon/message-protocol.js";
28
+ import type { ConversationGraphMemory } from "../memory/graph/conversation-graph-memory.js";
29
+ import type { QdrantSparseVector } from "../memory/qdrant-client.js";
30
+ import userPromptSubmitMemoryRetrieval, {
31
+ type MemoryRetrievalHookContext,
32
+ } from "../plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.js";
33
+ import type { Message } from "../providers/types.js";
34
+
35
+ /** Canonical metrics payload the graph retriever attaches to a real hit. */
36
+ function makeMetrics() {
37
+ return {
38
+ embeddingProvider: "openai",
39
+ embeddingModel: "text-embedding-3-small",
40
+ semanticHits: 2,
41
+ mergedCount: 3,
42
+ selectedCount: 1,
43
+ tier1Count: 1,
44
+ tier2Count: 0,
45
+ hybridSearchLatencyMs: 5,
46
+ sparseVectorUsed: true,
47
+ topCandidates: [
48
+ {
49
+ nodeId: "node-1",
50
+ type: "fact",
51
+ score: 0.9,
52
+ semanticSimilarity: 0.8,
53
+ recencyBoost: 0.1,
54
+ },
55
+ ],
56
+ queryContext: "query-context",
57
+ };
58
+ }
59
+
60
+ /**
61
+ * Fake graph-memory whose `prepareMemory` returns a canonical result. The hook
62
+ * unpacks this return value onto `ctx.latestMessages` and records the selected
63
+ * PKB query pair back onto the handle via `recordPkbQueryVectors`, so tests
64
+ * can assert those outputs by comparing object identity.
65
+ */
66
+ function makeFakeGraphMemory(overrides?: {
67
+ messages?: Message[];
68
+ injectedTokens?: number;
69
+ injectedBlockText?: string | null;
70
+ metrics?: ReturnType<typeof makeMetrics> | null;
71
+ queryVector?: number[];
72
+ sparseVector?: QdrantSparseVector;
73
+ userQueryVector?: number[];
74
+ userQuerySparseVector?: QdrantSparseVector;
75
+ }): {
76
+ memory: ConversationGraphMemory;
77
+ prepareMemoryMock: ReturnType<typeof mock>;
78
+ recordPkbQueryVectorsMock: ReturnType<typeof mock>;
79
+ } {
80
+ const returnValue = {
81
+ runMessages: overrides?.messages ?? [],
82
+ injectedTokens: overrides?.injectedTokens ?? 0,
83
+ latencyMs: 0,
84
+ mode: "none" as const,
85
+ injectedBlockText:
86
+ overrides?.injectedBlockText === undefined
87
+ ? null
88
+ : overrides.injectedBlockText,
89
+ metrics: overrides?.metrics ?? null,
90
+ queryVector: overrides?.queryVector,
91
+ sparseVector: overrides?.sparseVector,
92
+ userQueryVector: overrides?.userQueryVector,
93
+ userQuerySparseVector: overrides?.userQuerySparseVector,
94
+ };
95
+ const prepareMemoryMock = mock(async () => returnValue);
96
+ const recordPkbQueryVectorsMock = mock(() => {});
97
+ const memory = {
98
+ prepareMemory: prepareMemoryMock,
99
+ recordPkbQueryVectors: recordPkbQueryVectorsMock,
100
+ } as unknown as ConversationGraphMemory;
101
+ return { memory, prepareMemoryMock, recordPkbQueryVectorsMock };
102
+ }
103
+
104
+ function makeHookCtx(
105
+ overrides: Partial<MemoryRetrievalHookContext> = {},
106
+ ): MemoryRetrievalHookContext {
107
+ const { memory } = makeFakeGraphMemory();
108
+ return {
109
+ graphMemory: memory,
110
+ config: {} as AssistantConfig,
111
+ onEvent: () => {},
112
+ isTrustedActor: true,
113
+ conversationId: "conv-test",
114
+ userMessageId: "msg-test",
115
+ logger: {
116
+ warn: () => {},
117
+ } as unknown as MemoryRetrievalHookContext["logger"],
118
+ signal: new AbortController().signal,
119
+ latestMessages: [],
120
+ ...overrides,
121
+ };
122
+ }
123
+
124
+ beforeEach(() => {
125
+ updateMessageMetadataMock.mockReset();
126
+ recordMemoryRecallLogMock.mockReset();
127
+ });
128
+
129
+ describe("user-prompt-submit-temp hook (memory retrieval)", () => {
130
+ test("adopts the injected run messages when the actor is trusted", async () => {
131
+ const injected: Message[] = [
132
+ { role: "user", content: [{ type: "text", text: "injected" }] },
133
+ ];
134
+ const { memory, prepareMemoryMock } = makeFakeGraphMemory({
135
+ messages: injected,
136
+ });
137
+ const ctx = makeHookCtx({ graphMemory: memory, isTrustedActor: true });
138
+
139
+ await userPromptSubmitMemoryRetrieval(ctx);
140
+
141
+ expect(prepareMemoryMock).toHaveBeenCalledTimes(1);
142
+ // The hook adopts the retriever's injected message array verbatim —
143
+ // consumers in the agent loop rely on that identity.
144
+ expect(ctx.latestMessages).toBe(injected);
145
+ });
146
+
147
+ test("selects the user-query dense/sparse pair when present, else the summary pair", async () => {
148
+ const userDense = [1, 1, 1];
149
+ const userSparse: QdrantSparseVector = { indices: [0], values: [1] };
150
+ const summaryDense = [2, 2, 2];
151
+ const summarySparse: QdrantSparseVector = { indices: [1], values: [2] };
152
+
153
+ const withUserQuery = makeFakeGraphMemory({
154
+ queryVector: summaryDense,
155
+ sparseVector: summarySparse,
156
+ userQueryVector: userDense,
157
+ userQuerySparseVector: userSparse,
158
+ });
159
+ const userCtx = makeHookCtx({ graphMemory: withUserQuery.memory });
160
+ await userPromptSubmitMemoryRetrieval(userCtx);
161
+ // User-query pair wins — never crossed with the summary signal — and is
162
+ // recorded back onto the graph handle for the PKB-reminder injector.
163
+ expect(withUserQuery.recordPkbQueryVectorsMock).toHaveBeenCalledWith(
164
+ userDense,
165
+ userSparse,
166
+ );
167
+
168
+ const summaryOnly = makeFakeGraphMemory({
169
+ queryVector: summaryDense,
170
+ sparseVector: summarySparse,
171
+ });
172
+ const summaryCtx = makeHookCtx({ graphMemory: summaryOnly.memory });
173
+ await userPromptSubmitMemoryRetrieval(summaryCtx);
174
+ expect(summaryOnly.recordPkbQueryVectorsMock).toHaveBeenCalledWith(
175
+ summaryDense,
176
+ summarySparse,
177
+ );
178
+ });
179
+
180
+ test("skips graph retrieval and side effects for untrusted actors", async () => {
181
+ const { memory, prepareMemoryMock, recordPkbQueryVectorsMock } =
182
+ makeFakeGraphMemory();
183
+ const seeded: Message[] = [
184
+ { role: "user", content: [{ type: "text", text: "seeded" }] },
185
+ ];
186
+ const ctx = makeHookCtx({
187
+ graphMemory: memory,
188
+ isTrustedActor: false,
189
+ latestMessages: seeded,
190
+ });
191
+
192
+ await userPromptSubmitMemoryRetrieval(ctx);
193
+
194
+ expect(prepareMemoryMock).not.toHaveBeenCalled();
195
+ // No graph retrieval ran: the working array stays the seeded input and no
196
+ // PKB query pair is recorded onto the graph handle.
197
+ expect(ctx.latestMessages).toBe(seeded);
198
+ expect(recordPkbQueryVectorsMock).not.toHaveBeenCalled();
199
+ expect(recordMemoryRecallLogMock).not.toHaveBeenCalled();
200
+ expect(updateMessageMetadataMock).not.toHaveBeenCalled();
201
+ });
202
+
203
+ test("persists injected block, recall log, and emits memory_recalled", async () => {
204
+ const received: ServerMessage[] = [];
205
+ const { memory } = makeFakeGraphMemory({
206
+ injectedBlockText: "injected-block",
207
+ metrics: makeMetrics(),
208
+ });
209
+ const ctx = makeHookCtx({
210
+ graphMemory: memory,
211
+ onEvent: (msg) => received.push(msg),
212
+ userMessageId: "msg-42",
213
+ conversationId: "conv-42",
214
+ });
215
+
216
+ await userPromptSubmitMemoryRetrieval(ctx);
217
+
218
+ expect(updateMessageMetadataMock).toHaveBeenCalledWith("msg-42", {
219
+ memoryInjectedBlock: "injected-block",
220
+ });
221
+ expect(recordMemoryRecallLogMock).toHaveBeenCalledTimes(1);
222
+ const logEntry = recordMemoryRecallLogMock.mock.calls[0]?.[0] as {
223
+ conversationId: string;
224
+ reason: string;
225
+ };
226
+ expect(logEntry.conversationId).toBe("conv-42");
227
+ expect(logEntry.reason).toBe("graph:none");
228
+ expect(received).toHaveLength(1);
229
+ expect(received[0]?.type).toBe("memory_recalled");
230
+ });
231
+
232
+ test("skips metadata persist when no block text is injected", async () => {
233
+ const { memory } = makeFakeGraphMemory({ injectedBlockText: null });
234
+ const ctx = makeHookCtx({ graphMemory: memory });
235
+
236
+ await userPromptSubmitMemoryRetrieval(ctx);
237
+
238
+ expect(updateMessageMetadataMock).not.toHaveBeenCalled();
239
+ // The recall log is still written even without an injected block.
240
+ expect(recordMemoryRecallLogMock).toHaveBeenCalledTimes(1);
241
+ });
242
+
243
+ test("propagates errors from prepareMemory rather than swallowing them", async () => {
244
+ // Memory is critical — failures must surface to the caller (the agent
245
+ // loop) rather than silently degrading to an empty graph result.
246
+ const failingPrepare = mock(
247
+ (
248
+ _msgs: Message[],
249
+ _cfg: AssistantConfig,
250
+ _signal: AbortSignal,
251
+ _onEvent: (msg: ServerMessage) => void,
252
+ ) => Promise.reject(new Error("retrieval failed")),
253
+ );
254
+ const graphMemory = {
255
+ prepareMemory: failingPrepare,
256
+ } as unknown as ConversationGraphMemory;
257
+ const ctx = makeHookCtx({ graphMemory, isTrustedActor: true });
258
+
259
+ await expect(userPromptSubmitMemoryRetrieval(ctx)).rejects.toThrow(
260
+ "retrieval failed",
261
+ );
262
+ });
263
+
264
+ test("forwards the context abort signal into prepareMemory", async () => {
265
+ // The hook hands its `ctx.signal` straight to `prepareMemory` so an
266
+ // external cancel aborts the underlying retrieval.
267
+ let capturedSignal: AbortSignal | undefined;
268
+ const prepareMemoryMock = mock(
269
+ async (
270
+ _msgs: Message[],
271
+ _cfg: AssistantConfig,
272
+ signal: AbortSignal,
273
+ _onEvent: (msg: ServerMessage) => void,
274
+ ) => {
275
+ capturedSignal = signal;
276
+ return {
277
+ runMessages: [],
278
+ injectedTokens: 0,
279
+ latencyMs: 0,
280
+ mode: "none" as const,
281
+ injectedBlockText: null,
282
+ metrics: null,
283
+ };
284
+ },
285
+ );
286
+ const graphMemory = {
287
+ prepareMemory: prepareMemoryMock,
288
+ recordPkbQueryVectors: mock(() => {}),
289
+ } as unknown as ConversationGraphMemory;
290
+ const controller = new AbortController();
291
+ const ctx = makeHookCtx({ graphMemory, signal: controller.signal });
292
+
293
+ await userPromptSubmitMemoryRetrieval(ctx);
294
+
295
+ expect(capturedSignal).toBe(controller.signal);
296
+ });
297
+ });
@@ -2,26 +2,46 @@
2
2
  * Tests for the `memory-v2-static` runtime injector.
3
3
  *
4
4
  * Covers:
5
- * - Returns null when `memoryV2Static` is missing/empty.
5
+ * - Returns null when the v2 static memory files are absent/empty.
6
6
  * - Returns null when `mode === "minimal"`.
7
7
  * - Wraps content in `<info>...</info>` and uses
8
8
  * `after-memory-prefix` placement.
9
9
  * - Escapes any `</info>` substring inside the authored content so the
10
10
  * wrapper cannot be broken out of.
11
+ * - Skips (re)injection when the `<info>` block is already present in the
12
+ * turn's working messages (presence detection — the block persists in
13
+ * history between compactions).
11
14
  *
12
- * Hermetic: drives the injector's `produce()` directly with a synthesized
13
- * `TurnContext` no daemon, no filesystem.
15
+ * The injector sources its content itself via `readMemoryV2StaticContent()`
16
+ * behind the personal-memory trust gate, so each test seeds the workspace
17
+ * memory files rather than passing the content in as an option. Mocks
18
+ * `config/loader` so the v2 gates are on without standing up a full config.
14
19
  */
15
20
 
16
- import { describe, expect, test } from "bun:test";
21
+ import { mkdirSync, rmSync, writeFileSync } from "node:fs";
22
+ import { dirname } from "node:path";
23
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
17
24
 
18
- import { defaultInjectorsPlugin } from "../plugins/defaults/injectors/register.js";
25
+ const realLoader = await import("../config/loader.js");
26
+
27
+ mock.module("../config/loader.js", () => ({
28
+ ...realLoader,
29
+ loadConfig: () => ({
30
+ memory: { enabled: true, v2: { enabled: true } },
31
+ }),
32
+ getConfig: () => ({
33
+ memory: { enabled: true, v2: { enabled: true } },
34
+ }),
35
+ }));
36
+
37
+ const { defaultInjectors } =
38
+ await import("../plugins/defaults/injectors/register.js");
19
39
  import type { Injector, TurnContext } from "../plugins/types.js";
40
+ import type { Message } from "../providers/types.js";
41
+ import { getWorkspacePromptPath } from "../util/platform.js";
20
42
 
21
43
  function findInjector(name: string): Injector {
22
- const injector = defaultInjectorsPlugin.injectors?.find(
23
- (i) => i.name === name,
24
- );
44
+ const injector = defaultInjectors.find((i) => i.name === name);
25
45
  if (!injector) {
26
46
  throw new Error(`injector '${name}' not registered`);
27
47
  }
@@ -34,57 +54,69 @@ function makeContext(overrides: Partial<TurnContext> = {}): TurnContext {
34
54
  conversationId: "conv-test",
35
55
  turnIndex: 0,
36
56
  trust: { sourceChannel: "vellum", trustClass: "guardian" },
57
+ injectionInputs: {},
37
58
  ...overrides,
38
59
  };
39
60
  }
40
61
 
62
+ /** Seed a single v2 static memory section under `## Essentials`. */
63
+ function seedEssentials(body: string): void {
64
+ const path = getWorkspacePromptPath("memory/essentials.md");
65
+ mkdirSync(dirname(path), { recursive: true });
66
+ writeFileSync(path, body, "utf-8");
67
+ }
68
+
69
+ function seedThreads(body: string): void {
70
+ const path = getWorkspacePromptPath("memory/threads.md");
71
+ mkdirSync(dirname(path), { recursive: true });
72
+ writeFileSync(path, body, "utf-8");
73
+ }
74
+
75
+ function clearV2StaticFiles(): void {
76
+ for (const file of [
77
+ "memory/essentials.md",
78
+ "memory/threads.md",
79
+ "memory/recent.md",
80
+ "memory/buffer.md",
81
+ ]) {
82
+ rmSync(getWorkspacePromptPath(file), { force: true });
83
+ }
84
+ }
85
+
41
86
  const memoryV2StaticInjector = findInjector("memory-v2-static");
42
87
 
43
88
  describe("memory-v2-static injector", () => {
44
- test("returns null when memoryV2Static is undefined", async () => {
45
- const ctx = makeContext({ injectionInputs: {} });
46
- expect(await memoryV2StaticInjector.produce(ctx)).toBeNull();
47
- });
89
+ beforeEach(() => clearV2StaticFiles());
90
+ afterEach(() => clearV2StaticFiles());
48
91
 
49
- test("returns null when memoryV2Static is null", async () => {
50
- const ctx = makeContext({ injectionInputs: { memoryV2Static: null } });
51
- expect(await memoryV2StaticInjector.produce(ctx)).toBeNull();
52
- });
53
-
54
- test("returns null when memoryV2Static is an empty string", async () => {
55
- const ctx = makeContext({ injectionInputs: { memoryV2Static: "" } });
92
+ test("returns null when the v2 static memory files are absent", async () => {
93
+ const ctx = makeContext();
56
94
  expect(await memoryV2StaticInjector.produce(ctx)).toBeNull();
57
95
  });
58
96
 
59
97
  test("returns null in minimal mode even with content", async () => {
60
- const ctx = makeContext({
61
- injectionInputs: {
62
- mode: "minimal",
63
- memoryV2Static: "## Essentials\n\nAlice prefers VS Code.",
64
- },
65
- });
98
+ seedEssentials("Alice prefers VS Code.");
99
+ const ctx = makeContext({ injectionInputs: { mode: "minimal" } });
66
100
  expect(await memoryV2StaticInjector.produce(ctx)).toBeNull();
67
101
  });
68
102
 
69
103
  test("wraps content in <info>...</info> with after-memory-prefix placement", async () => {
70
- const content =
71
- "## Essentials\n\nAlice prefers VS Code.\n\n## Threads\n\nOpen: ship PR.";
72
- const ctx = makeContext({
73
- injectionInputs: { memoryV2Static: content },
74
- });
104
+ seedEssentials("Alice prefers VS Code.");
105
+ seedThreads("Open: ship PR.");
106
+ const ctx = makeContext();
75
107
 
76
108
  const block = await memoryV2StaticInjector.produce(ctx);
77
109
  expect(block).not.toBeNull();
78
110
  expect(block!.id).toBe("memory-v2-static");
79
111
  expect(block!.placement).toBe("after-memory-prefix");
80
- expect(block!.text).toBe(`<info>\n${content}\n</info>`);
112
+ expect(block!.text).toBe(
113
+ "<info>\n## Essentials\n\nAlice prefers VS Code.\n\n## Threads\n\nOpen: ship PR.\n</info>",
114
+ );
81
115
  });
82
116
 
83
117
  test("escapes inner </info> closing tags so the wrapper cannot be broken out of", async () => {
84
- const content = "## Essentials\n\nText with </info> embedded.";
85
- const ctx = makeContext({
86
- injectionInputs: { memoryV2Static: content },
87
- });
118
+ seedEssentials("Text with </info> embedded.");
119
+ const ctx = makeContext();
88
120
 
89
121
  const block = await memoryV2StaticInjector.produce(ctx);
90
122
  expect(block).not.toBeNull();
@@ -92,4 +124,40 @@ describe("memory-v2-static injector", () => {
92
124
  "<info>\n## Essentials\n\nText with &lt;/info&gt; embedded.\n</info>",
93
125
  );
94
126
  });
127
+
128
+ test("skips (re)injection when the <info> block is already present", async () => {
129
+ seedEssentials("Alice prefers VS Code.");
130
+ const ctx = makeContext();
131
+ const runMessages: Message[] = [
132
+ {
133
+ role: "user",
134
+ content: [
135
+ { type: "text", text: "<info>\nstale memory\n</info>" },
136
+ { type: "text", text: "What next?" },
137
+ ],
138
+ },
139
+ ];
140
+ expect(await memoryV2StaticInjector.produce(ctx, runMessages)).toBeNull();
141
+ });
142
+
143
+ test("skips (re)injection when a legacy <memory>-wrapped static block is present", async () => {
144
+ // Rows persisted before the `<info>` switch rehydrate the static block as
145
+ // `<memory>…</memory>`. Re-injecting a fresh `<info>` copy alongside it
146
+ // would duplicate the content until the next compaction.
147
+ seedEssentials("Alice prefers VS Code.");
148
+ const ctx = makeContext();
149
+ const runMessages: Message[] = [
150
+ {
151
+ role: "user",
152
+ content: [
153
+ {
154
+ type: "text",
155
+ text: "<memory>\n## Essentials\n\nAlice prefers VS Code.\n</memory>",
156
+ },
157
+ { type: "text", text: "What next?" },
158
+ ],
159
+ },
160
+ ];
161
+ expect(await memoryV2StaticInjector.produce(ctx, runMessages)).toBeNull();
162
+ });
95
163
  });
@@ -71,9 +71,56 @@ mock.module("@anthropic-ai/sdk", () => ({
71
71
  },
72
72
  }));
73
73
 
74
+ // Mock daemon collaborators the handler module imports at load time so the
75
+ // handler-level tests below can drive `server_tool_complete` in isolation.
76
+ mock.module("../config/loader.js", () => ({
77
+ getConfig: () => ({
78
+ skills: {
79
+ entries: {},
80
+ load: { extraDirs: [], watch: false, watchDebounceMs: 0 },
81
+ install: { nodeManager: "npm" },
82
+ allowBundled: null,
83
+ remoteProviders: {
84
+ skillssh: { enabled: true },
85
+ clawhub: { enabled: true },
86
+ },
87
+ remotePolicy: {
88
+ blockSuspicious: true,
89
+ blockMalware: true,
90
+ maxSkillsShRisk: "medium",
91
+ },
92
+ },
93
+ }),
94
+ loadConfig: () => ({}),
95
+ }));
96
+
97
+ mock.module("../memory/conversation-crud.js", () => ({
98
+ addMessage: () => ({ id: "mock-msg-id" }),
99
+ getMessageById: () => null,
100
+ updateMessageContent: () => {},
101
+ provenanceFromTrustContext: () => ({}),
102
+ reserveMessage: mock(async () => ({ id: "msg-reserve" })),
103
+ }));
104
+
105
+ mock.module("../memory/llm-request-log-store.js", () => ({
106
+ recordRequestLog: () => {},
107
+ backfillMessageIdOnLogs: () => {},
108
+ }));
109
+
74
110
  // Import after mocking
111
+ import {
112
+ createEventHandlerState,
113
+ type EventHandlerState,
114
+ } from "../daemon/conversation-agent-loop-handlers.js";
75
115
  import { AnthropicProvider } from "../providers/anthropic/client.js";
76
116
  import { isNativeWebSearchCapableProvider } from "../providers/registry.js";
117
+ import { WEB_SEARCH_BACKEND_FAILURE_MESSAGE } from "../tools/network/web-search-error.js";
118
+ import {
119
+ completeNativeWebSearch,
120
+ createHandlerDeps,
121
+ lastToolResult,
122
+ toolResults,
123
+ } from "./helpers/native-web-search-harness.js";
77
124
 
78
125
  // ---------------------------------------------------------------------------
79
126
  // Helpers
@@ -499,3 +546,147 @@ describe("Native Web Search — Streaming Events", () => {
499
546
  expect(toolUseEvents).toHaveLength(1);
500
547
  });
501
548
  });
549
+
550
+ // ---------------------------------------------------------------------------
551
+ // Tests — Native server_tool_complete backend-failure handling (ATL-727)
552
+ // ---------------------------------------------------------------------------
553
+
554
+ describe("Native Web Search — Backend Failure Handling", () => {
555
+ let state: EventHandlerState;
556
+
557
+ beforeEach(() => {
558
+ state = createEventHandlerState();
559
+ });
560
+
561
+ test("backend failure surfaces friendly copy with isError true and empty results", async () => {
562
+ const { deps, events } = createHandlerDeps();
563
+ await completeNativeWebSearch(state, deps, "tu_backend", {
564
+ isError: true,
565
+ errorCode: "unavailable",
566
+ });
567
+
568
+ const result = lastToolResult(events);
569
+ expect(result?.activityMetadata?.webSearch?.errorMessage).toBe(
570
+ WEB_SEARCH_BACKEND_FAILURE_MESSAGE,
571
+ );
572
+ expect(result?.isError).toBe(true);
573
+ expect(result?.activityMetadata?.webSearch?.resultCount).toBe(0);
574
+ expect(result?.activityMetadata?.webSearch?.results).toEqual([]);
575
+ });
576
+
577
+ test("raw error_code is logged under web_search_backend_failure but absent from user copy", async () => {
578
+ const { deps, events, warnings } = createHandlerDeps();
579
+ await completeNativeWebSearch(state, deps, "tu_log", {
580
+ isError: true,
581
+ errorCode: "unavailable",
582
+ });
583
+
584
+ const failureLog = warnings.find(
585
+ (w) => w.obj.event === "web_search_backend_failure",
586
+ );
587
+ expect(failureLog).toBeDefined();
588
+ expect(failureLog?.obj.provider).toBe("anthropic-native");
589
+ expect(String(failureLog?.obj.rawDetail)).toContain("unavailable");
590
+ expect(failureLog?.obj.fallbackShown).toBe(true);
591
+
592
+ const errorMessage = lastToolResult(events)?.activityMetadata?.webSearch
593
+ ?.errorMessage;
594
+ expect(errorMessage).not.toContain("unavailable");
595
+ });
596
+
597
+ test("dedups repeat backend failures within one turn to a single friendly notice", async () => {
598
+ const { deps, events, warnings } = createHandlerDeps();
599
+
600
+ await completeNativeWebSearch(state, deps, "tu_dup_1", {
601
+ isError: true,
602
+ errorCode: "unavailable",
603
+ });
604
+ await completeNativeWebSearch(state, deps, "tu_dup_2", {
605
+ isError: true,
606
+ errorCode: "overloaded_error",
607
+ });
608
+
609
+ const results = toolResults(events);
610
+ expect(results).toHaveLength(2);
611
+ expect(results[0]?.activityMetadata?.webSearch?.errorMessage).toBe(
612
+ WEB_SEARCH_BACKEND_FAILURE_MESSAGE,
613
+ );
614
+ // The second backend failure in the same turn is terse, not the full notice.
615
+ expect(results[1]?.activityMetadata?.webSearch?.errorMessage).not.toBe(
616
+ WEB_SEARCH_BACKEND_FAILURE_MESSAGE,
617
+ );
618
+
619
+ const failureLogs = warnings.filter(
620
+ (w) => w.obj.event === "web_search_backend_failure",
621
+ );
622
+ // Both failures are logged, but only the first reports fallbackShown.
623
+ expect(failureLogs).toHaveLength(2);
624
+ expect(failureLogs.filter((w) => w.obj.fallbackShown === true)).toHaveLength(
625
+ 1,
626
+ );
627
+ });
628
+
629
+ test("successful search leaves errorMessage undefined and populates results", async () => {
630
+ const { deps, events, warnings } = createHandlerDeps();
631
+ await completeNativeWebSearch(state, deps, "tu_ok", {
632
+ isError: false,
633
+ content: [
634
+ {
635
+ type: "web_search_result",
636
+ title: "Weather",
637
+ url: "https://example.com/weather",
638
+ },
639
+ ],
640
+ });
641
+
642
+ const meta = lastToolResult(events)?.activityMetadata?.webSearch;
643
+ expect(meta?.errorMessage).toBeUndefined();
644
+ expect(meta?.resultCount).toBe(1);
645
+ expect(meta?.results[0]?.title).toBe("Weather");
646
+ expect(lastToolResult(events)?.isError).toBe(false);
647
+ expect(
648
+ warnings.filter((w) => w.obj.event === "web_search_backend_failure"),
649
+ ).toHaveLength(0);
650
+ });
651
+
652
+ test("query_too_long yields a distinct non-backend message", async () => {
653
+ const { deps, events, warnings } = createHandlerDeps();
654
+ await completeNativeWebSearch(state, deps, "tu_long", {
655
+ isError: true,
656
+ errorCode: "query_too_long",
657
+ });
658
+
659
+ const errorMessage = lastToolResult(events)?.activityMetadata?.webSearch
660
+ ?.errorMessage;
661
+ expect(errorMessage).toBeDefined();
662
+ expect(errorMessage).not.toBe(WEB_SEARCH_BACKEND_FAILURE_MESSAGE);
663
+ // Recoverable non-backend errors must NOT emit backend-failure telemetry.
664
+ expect(
665
+ warnings.filter((w) => w.obj.event === "web_search_backend_failure"),
666
+ ).toHaveLength(0);
667
+ });
668
+
669
+ test("message-less native failure (no error_code) surfaces friendly copy, not the terse 'Search failed' placeholder, and emits no backend telemetry", async () => {
670
+ const { deps, events, warnings } = createHandlerDeps("req-unknown");
671
+ // `isError:true` with no error_code/message classifies as `unknown`
672
+ // (isBackendFailure:false, empty userMessage). It must still get the
673
+ // friendly copy rather than the bare "Search failed".
674
+ await completeNativeWebSearch(state, deps, "tu_unknown", {
675
+ isError: true,
676
+ });
677
+
678
+ const result = lastToolResult(events);
679
+ const meta = result?.activityMetadata?.webSearch;
680
+ expect(meta?.errorMessage).toBe(WEB_SEARCH_BACKEND_FAILURE_MESSAGE);
681
+ expect(meta?.errorMessage).not.toBe("Search failed");
682
+ expect(result?.isError).toBe(true);
683
+ expect(meta?.resultCount).toBe(0);
684
+ expect(meta?.results).toEqual([]);
685
+
686
+ // An unclassifiable failure borrows the friendly copy but must NOT be
687
+ // logged as a backend outage.
688
+ expect(
689
+ warnings.filter((w) => w.obj.event === "web_search_backend_failure"),
690
+ ).toHaveLength(0);
691
+ });
692
+ });