@vellumai/assistant 0.4.49 → 0.4.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/integrations.md +2 -2
  4. package/docs/architecture/keychain-broker.md +6 -6
  5. package/docs/architecture/memory.md +180 -119
  6. package/knip.json +32 -0
  7. package/package.json +3 -2
  8. package/src/__tests__/agent-loop.test.ts +3 -1
  9. package/src/__tests__/anthropic-provider.test.ts +114 -23
  10. package/src/__tests__/approval-cascade.test.ts +1 -15
  11. package/src/__tests__/approval-routes-http.test.ts +2 -0
  12. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  13. package/src/__tests__/btw-routes.test.ts +61 -5
  14. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  15. package/src/__tests__/checker.test.ts +13 -0
  16. package/src/__tests__/config-schema.test.ts +1 -68
  17. package/src/__tests__/config-watcher.test.ts +8 -0
  18. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  19. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  20. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  21. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  22. package/src/__tests__/credential-security-invariants.test.ts +8 -7
  23. package/src/__tests__/credential-vault-unit.test.ts +23 -18
  24. package/src/__tests__/credential-vault.test.ts +30 -18
  25. package/src/__tests__/credentials-cli.test.ts +257 -82
  26. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  27. package/src/__tests__/date-context.test.ts +93 -77
  28. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  29. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  30. package/src/__tests__/history-repair.test.ts +245 -0
  31. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  32. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  33. package/src/__tests__/inbound-invite-redemption.test.ts +36 -7
  34. package/src/__tests__/integration-status.test.ts +31 -30
  35. package/src/__tests__/invite-redemption-service.test.ts +166 -13
  36. package/src/__tests__/invite-routes-http.test.ts +166 -5
  37. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  38. package/src/__tests__/list-messages-attachments.test.ts +193 -0
  39. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  40. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  41. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  42. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  43. package/src/__tests__/memory-regressions.test.ts +477 -2841
  44. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  45. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  46. package/src/__tests__/mime-builder.test.ts +28 -0
  47. package/src/__tests__/native-web-search.test.ts +1 -0
  48. package/src/__tests__/oauth-cli.test.ts +824 -31
  49. package/src/__tests__/oauth-provider-profiles.test.ts +1 -1
  50. package/src/__tests__/oauth-store.test.ts +363 -17
  51. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  52. package/src/__tests__/registry.test.ts +0 -1
  53. package/src/__tests__/relay-server.test.ts +55 -1
  54. package/src/__tests__/schedule-tools.test.ts +32 -0
  55. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  56. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  57. package/src/__tests__/secret-routes-managed-proxy.test.ts +183 -0
  58. package/src/__tests__/secure-keys.test.ts +78 -18
  59. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  60. package/src/__tests__/server-history-render.test.ts +2 -2
  61. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  62. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  63. package/src/__tests__/session-agent-loop.test.ts +19 -15
  64. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  65. package/src/__tests__/session-error.test.ts +124 -2
  66. package/src/__tests__/session-history-web-search.test.ts +918 -0
  67. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  68. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  69. package/src/__tests__/session-queue.test.ts +37 -27
  70. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  71. package/src/__tests__/session-slash-known.test.ts +1 -15
  72. package/src/__tests__/session-slash-queue.test.ts +1 -15
  73. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  74. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  75. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  76. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  77. package/src/__tests__/skills-install-extract.test.ts +93 -0
  78. package/src/__tests__/skills.test.ts +2 -2
  79. package/src/__tests__/skillssh-registry.test.ts +451 -0
  80. package/src/__tests__/slack-channel-config.test.ts +10 -8
  81. package/src/__tests__/trust-store.test.ts +15 -0
  82. package/src/__tests__/twilio-config.test.ts +11 -10
  83. package/src/__tests__/twilio-provider.test.ts +9 -4
  84. package/src/__tests__/voice-invite-redemption.test.ts +85 -5
  85. package/src/agent/ax-tree-compaction.test.ts +51 -0
  86. package/src/agent/loop.ts +39 -12
  87. package/src/approvals/AGENTS.md +1 -1
  88. package/src/approvals/guardian-request-resolvers.ts +14 -2
  89. package/src/bundler/compiler-tools.ts +66 -2
  90. package/src/calls/call-domain.ts +134 -3
  91. package/src/calls/call-store.ts +6 -0
  92. package/src/calls/relay-server.ts +44 -6
  93. package/src/calls/relay-setup-router.ts +17 -1
  94. package/src/calls/twilio-config.ts +5 -4
  95. package/src/calls/twilio-provider.ts +14 -9
  96. package/src/calls/twilio-rest.ts +10 -7
  97. package/src/calls/types.ts +3 -1
  98. package/src/cli/commands/config.ts +14 -9
  99. package/src/cli/commands/contacts.ts +3 -0
  100. package/src/cli/commands/credentials.ts +170 -174
  101. package/src/cli/commands/doctor.ts +11 -8
  102. package/src/cli/commands/keys.ts +9 -9
  103. package/src/cli/commands/mcp.ts +46 -59
  104. package/src/cli/commands/memory.ts +16 -165
  105. package/src/cli/commands/oauth/apps.ts +68 -10
  106. package/src/cli/commands/oauth/connections.ts +475 -105
  107. package/src/cli/commands/oauth/index.ts +3 -3
  108. package/src/cli/commands/oauth/providers.ts +18 -4
  109. package/src/cli/commands/sessions.ts +5 -2
  110. package/src/cli/commands/skills.ts +173 -1
  111. package/src/cli/http-client.ts +0 -20
  112. package/src/cli/main-screen.tsx +2 -2
  113. package/src/cli/program.ts +5 -6
  114. package/src/cli.ts +20 -22
  115. package/src/config/__tests__/feature-flag-registry-bundled.test.ts +39 -0
  116. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  117. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  118. package/src/config/bundled-skills/contacts/SKILL.md +35 -11
  119. package/src/config/bundled-skills/contacts/tools/google-contacts.ts +1 -1
  120. package/src/config/bundled-skills/gmail/SKILL.md +1 -1
  121. package/src/config/bundled-skills/gmail/TOOLS.json +52 -0
  122. package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +13 -3
  123. package/src/config/bundled-skills/gmail/tools/gmail-attachments.ts +9 -2
  124. package/src/config/bundled-skills/gmail/tools/gmail-draft.ts +5 -1
  125. package/src/config/bundled-skills/gmail/tools/gmail-filters.ts +5 -1
  126. package/src/config/bundled-skills/gmail/tools/gmail-follow-up.ts +5 -1
  127. package/src/config/bundled-skills/gmail/tools/gmail-forward.ts +5 -1
  128. package/src/config/bundled-skills/gmail/tools/gmail-label.ts +9 -2
  129. package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +5 -1
  130. package/src/config/bundled-skills/gmail/tools/gmail-send-draft.ts +5 -1
  131. package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +5 -1
  132. package/src/config/bundled-skills/gmail/tools/gmail-trash.ts +5 -1
  133. package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +5 -1
  134. package/src/config/bundled-skills/gmail/tools/gmail-vacation.ts +5 -1
  135. package/src/config/bundled-skills/google-calendar/TOOLS.json +20 -0
  136. package/src/config/bundled-skills/google-calendar/tools/calendar-check-availability.ts +2 -1
  137. package/src/config/bundled-skills/google-calendar/tools/calendar-create-event.ts +2 -1
  138. package/src/config/bundled-skills/google-calendar/tools/calendar-get-event.ts +2 -1
  139. package/src/config/bundled-skills/google-calendar/tools/calendar-list-events.ts +2 -1
  140. package/src/config/bundled-skills/google-calendar/tools/calendar-rsvp.ts +2 -1
  141. package/src/config/bundled-skills/google-calendar/tools/shared.ts +8 -2
  142. package/src/config/bundled-skills/messaging/SKILL.md +1 -1
  143. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -2
  144. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +2 -2
  145. package/src/config/bundled-skills/messaging/tools/messaging-auth-test.ts +2 -2
  146. package/src/config/bundled-skills/messaging/tools/messaging-list-conversations.ts +2 -2
  147. package/src/config/bundled-skills/messaging/tools/messaging-mark-read.ts +2 -2
  148. package/src/config/bundled-skills/messaging/tools/messaging-read.ts +2 -2
  149. package/src/config/bundled-skills/messaging/tools/messaging-search.ts +2 -2
  150. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +2 -2
  151. package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +2 -2
  152. package/src/config/bundled-skills/messaging/tools/shared.ts +7 -5
  153. package/src/config/bundled-skills/slack/tools/shared.ts +1 -1
  154. package/src/config/bundled-skills/slack/tools/slack-add-reaction.ts +1 -1
  155. package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +1 -1
  156. package/src/config/bundled-skills/slack/tools/slack-delete-message.ts +1 -1
  157. package/src/config/bundled-skills/slack/tools/slack-edit-message.ts +1 -1
  158. package/src/config/bundled-skills/slack/tools/slack-leave-channel.ts +1 -1
  159. package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +1 -1
  160. package/src/config/bundled-tool-registry.ts +2 -5
  161. package/src/config/loader.ts +6 -42
  162. package/src/config/schema.ts +1 -12
  163. package/src/config/schemas/memory-lifecycle.ts +0 -9
  164. package/src/config/schemas/memory-processing.ts +0 -180
  165. package/src/config/schemas/memory-retrieval.ts +32 -104
  166. package/src/config/schemas/memory.ts +0 -10
  167. package/src/config/types.ts +0 -4
  168. package/src/contacts/contact-store.ts +39 -2
  169. package/src/contacts/contacts-write.ts +9 -0
  170. package/src/context/window-manager.ts +4 -1
  171. package/src/daemon/config-watcher.ts +55 -2
  172. package/src/daemon/daemon-control.ts +1 -1
  173. package/src/daemon/date-context.ts +114 -31
  174. package/src/daemon/handlers/config-ingress.ts +2 -2
  175. package/src/daemon/handlers/config-slack-channel.ts +59 -39
  176. package/src/daemon/handlers/config-telegram.ts +23 -14
  177. package/src/daemon/handlers/session-history.ts +1 -358
  178. package/src/daemon/handlers/sessions.ts +18 -13
  179. package/src/daemon/handlers/shared.ts +3 -17
  180. package/src/daemon/handlers/skills.ts +20 -1
  181. package/src/daemon/history-repair.ts +72 -8
  182. package/src/daemon/host-cu-proxy.ts +55 -26
  183. package/src/daemon/lifecycle.ts +39 -4
  184. package/src/daemon/mcp-reload-service.ts +2 -2
  185. package/src/daemon/message-types/computer-use.ts +1 -12
  186. package/src/daemon/message-types/memory.ts +4 -16
  187. package/src/daemon/message-types/messages.ts +1 -0
  188. package/src/daemon/message-types/sessions.ts +4 -42
  189. package/src/daemon/server.ts +6 -1
  190. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  191. package/src/daemon/session-agent-loop.ts +334 -48
  192. package/src/daemon/session-error.ts +89 -6
  193. package/src/daemon/session-history.ts +17 -7
  194. package/src/daemon/session-media-retry.ts +6 -2
  195. package/src/daemon/session-memory.ts +69 -149
  196. package/src/daemon/session-process.ts +10 -1
  197. package/src/daemon/session-runtime-assembly.ts +49 -19
  198. package/src/daemon/session-slash.ts +3 -5
  199. package/src/daemon/session-surfaces.ts +4 -1
  200. package/src/daemon/session-tool-setup.ts +7 -1
  201. package/src/daemon/session.ts +12 -2
  202. package/src/email/providers/index.ts +2 -2
  203. package/src/instrument.ts +61 -1
  204. package/src/media/avatar-router.ts +1 -1
  205. package/src/memory/admin.ts +2 -191
  206. package/src/memory/canonical-guardian-store.ts +38 -2
  207. package/src/memory/conversation-crud.ts +0 -33
  208. package/src/memory/conversation-queries.ts +25 -83
  209. package/src/memory/db-init.ts +32 -0
  210. package/src/memory/embedding-backend.ts +84 -8
  211. package/src/memory/embedding-types.ts +9 -1
  212. package/src/memory/indexer.ts +7 -46
  213. package/src/memory/invite-store.ts +19 -0
  214. package/src/memory/items-extractor.ts +274 -76
  215. package/src/memory/job-handlers/backfill.ts +2 -127
  216. package/src/memory/job-handlers/cleanup.ts +2 -16
  217. package/src/memory/job-handlers/extraction.ts +2 -138
  218. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  219. package/src/memory/job-handlers/summarization.ts +3 -148
  220. package/src/memory/job-utils.ts +21 -59
  221. package/src/memory/jobs-store.ts +1 -159
  222. package/src/memory/jobs-worker.ts +9 -52
  223. package/src/memory/migrations/104-core-indexes.ts +3 -3
  224. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  225. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  226. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  227. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  228. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  229. package/src/memory/migrations/154-drop-fts.ts +20 -0
  230. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  231. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  232. package/src/memory/migrations/157-invite-contact-id.ts +104 -0
  233. package/src/memory/migrations/index.ts +8 -0
  234. package/src/memory/migrations/registry.ts +6 -0
  235. package/src/memory/qdrant-client.ts +148 -51
  236. package/src/memory/raw-query.ts +1 -1
  237. package/src/memory/retriever.test.ts +294 -273
  238. package/src/memory/retriever.ts +421 -645
  239. package/src/memory/schema/calls.ts +2 -0
  240. package/src/memory/schema/contacts.ts +1 -0
  241. package/src/memory/schema/memory-core.ts +3 -48
  242. package/src/memory/schema/oauth.ts +2 -0
  243. package/src/memory/search/formatting.ts +263 -176
  244. package/src/memory/search/lexical.ts +1 -254
  245. package/src/memory/search/ranking.ts +0 -455
  246. package/src/memory/search/semantic.ts +100 -14
  247. package/src/memory/search/staleness.ts +47 -0
  248. package/src/memory/search/tier-classifier.ts +21 -0
  249. package/src/memory/search/types.ts +15 -77
  250. package/src/memory/task-memory-cleanup.ts +4 -6
  251. package/src/messaging/provider.ts +1 -1
  252. package/src/messaging/providers/gmail/adapter.ts +1 -1
  253. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  254. package/src/messaging/providers/telegram-bot/adapter.ts +17 -8
  255. package/src/messaging/providers/whatsapp/adapter.ts +13 -9
  256. package/src/messaging/registry.ts +9 -5
  257. package/src/oauth/byo-connection.test.ts +40 -25
  258. package/src/oauth/connect-orchestrator.ts +4 -10
  259. package/src/oauth/connection-resolver.ts +20 -6
  260. package/src/oauth/manual-token-connection.ts +5 -5
  261. package/src/oauth/oauth-store.ts +183 -31
  262. package/src/oauth/platform-connection.test.ts +1 -1
  263. package/src/oauth/provider-behaviors.ts +503 -4
  264. package/src/oauth/seed-providers.ts +214 -8
  265. package/src/oauth/token-persistence.ts +31 -16
  266. package/src/permissions/defaults.ts +1 -0
  267. package/src/permissions/trust-store.ts +23 -1
  268. package/src/playbooks/playbook-compiler.ts +1 -1
  269. package/src/prompts/system-prompt.ts +18 -2
  270. package/src/providers/anthropic/client.ts +56 -126
  271. package/src/providers/types.ts +7 -1
  272. package/src/runtime/AGENTS.md +9 -0
  273. package/src/runtime/auth/route-policy.ts +6 -3
  274. package/src/runtime/channel-readiness-service.ts +48 -40
  275. package/src/runtime/guardian-reply-router.ts +24 -22
  276. package/src/runtime/http-server.ts +2 -2
  277. package/src/runtime/http-types.ts +2 -0
  278. package/src/runtime/invite-redemption-service.ts +72 -12
  279. package/src/runtime/invite-service.ts +43 -0
  280. package/src/runtime/middleware/twilio-validation.ts +1 -1
  281. package/src/runtime/pending-interactions.ts +2 -2
  282. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  283. package/src/runtime/routes/btw-routes.ts +10 -5
  284. package/src/runtime/routes/conversation-routes.ts +56 -11
  285. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  286. package/src/runtime/routes/integrations/slack/channel.ts +2 -2
  287. package/src/runtime/routes/integrations/telegram.ts +2 -2
  288. package/src/runtime/routes/integrations/twilio.ts +17 -17
  289. package/src/runtime/routes/invite-routes.ts +29 -4
  290. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  291. package/src/runtime/routes/memory-item-routes.ts +503 -0
  292. package/src/runtime/routes/secret-routes.ts +17 -0
  293. package/src/runtime/routes/session-management-routes.ts +3 -3
  294. package/src/runtime/routes/settings-routes.ts +3 -3
  295. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  296. package/src/runtime/routes/workspace-routes.ts +9 -4
  297. package/src/runtime/routes/workspace-utils.ts +8 -2
  298. package/src/schedule/integration-status.ts +26 -19
  299. package/src/security/keychain-broker-client.ts +17 -4
  300. package/src/security/oauth2.ts +6 -7
  301. package/src/security/secure-keys.ts +44 -19
  302. package/src/security/token-manager.ts +46 -39
  303. package/src/services/vercel-deploy.ts +0 -24
  304. package/src/signals/confirm.ts +78 -0
  305. package/src/signals/mcp-reload.ts +18 -0
  306. package/src/skills/catalog-install.ts +74 -18
  307. package/src/skills/skillssh-registry.ts +503 -0
  308. package/src/tools/assets/search.ts +5 -1
  309. package/src/tools/computer-use/definitions.ts +0 -10
  310. package/src/tools/computer-use/registry.ts +1 -1
  311. package/src/tools/credentials/vault.ts +22 -7
  312. package/src/tools/memory/definitions.ts +4 -13
  313. package/src/tools/memory/handlers.test.ts +83 -103
  314. package/src/tools/memory/handlers.ts +50 -85
  315. package/src/tools/network/script-proxy/session-manager.ts +8 -8
  316. package/src/tools/schedule/create.ts +10 -3
  317. package/src/tools/schedule/update.ts +8 -1
  318. package/src/tools/skills/load.ts +25 -2
  319. package/src/watcher/provider-types.ts +1 -1
  320. package/src/watcher/providers/github.ts +1 -1
  321. package/src/watcher/providers/gmail.ts +3 -3
  322. package/src/watcher/providers/google-calendar.ts +3 -3
  323. package/src/watcher/providers/linear.ts +1 -1
  324. package/src/__tests__/clarification-resolver.test.ts +0 -193
  325. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  326. package/src/__tests__/conflict-policy.test.ts +0 -269
  327. package/src/__tests__/conflict-store.test.ts +0 -372
  328. package/src/__tests__/contradiction-checker.test.ts +0 -361
  329. package/src/__tests__/entity-extractor.test.ts +0 -211
  330. package/src/__tests__/entity-search.test.ts +0 -1117
  331. package/src/__tests__/profile-compiler.test.ts +0 -392
  332. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  333. package/src/__tests__/session-profile-injection.test.ts +0 -557
  334. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  335. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  336. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  337. package/src/daemon/session-conflict-gate.ts +0 -167
  338. package/src/daemon/session-dynamic-profile.ts +0 -77
  339. package/src/memory/clarification-resolver.ts +0 -417
  340. package/src/memory/conflict-intent.ts +0 -205
  341. package/src/memory/conflict-policy.ts +0 -127
  342. package/src/memory/conflict-store.ts +0 -410
  343. package/src/memory/contradiction-checker.ts +0 -508
  344. package/src/memory/entity-extractor.ts +0 -535
  345. package/src/memory/format-recall.ts +0 -47
  346. package/src/memory/fts-reconciler.ts +0 -165
  347. package/src/memory/job-handlers/conflict.ts +0 -200
  348. package/src/memory/profile-compiler.ts +0 -195
  349. package/src/memory/recall-cache.ts +0 -117
  350. package/src/memory/search/entity.ts +0 -535
  351. package/src/memory/search/query-expansion.test.ts +0 -70
  352. package/src/memory/search/query-expansion.ts +0 -118
  353. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -1,5 +1,4 @@
1
- import { createHash } from "crypto";
2
- import { inArray } from "drizzle-orm";
1
+ import { inArray, sql } from "drizzle-orm";
3
2
 
4
3
  import type { AssistantConfig } from "../config/types.js";
5
4
  import { estimateTextTokens } from "../context/token-estimator.js";
@@ -12,40 +11,34 @@ import {
12
11
  import { getDb } from "./db.js";
13
12
  import {
14
13
  embedWithBackend,
14
+ generateSparseEmbedding,
15
15
  getMemoryBackendStatus,
16
16
  logMemoryEmbeddingWarning,
17
17
  } from "./embedding-backend.js";
18
- import { formatRecallText } from "./format-recall.js";
18
+ import { isQdrantBreakerOpen } from "./qdrant-circuit-breaker.js";
19
19
  import {
20
- isQdrantBreakerOpen,
21
- QdrantCircuitOpenError,
22
- } from "./qdrant-circuit-breaker.js";
20
+ conversations,
21
+ memoryItems,
22
+ memoryItemSources,
23
+ messages,
24
+ } from "./schema.js";
23
25
  import {
24
- getCachedRecall,
25
- getMemoryVersion,
26
- setCachedRecall,
27
- } from "./recall-cache.js";
28
- import { memoryItemSources } from "./schema.js";
29
- import { entitySearch } from "./search/entity.js";
30
- import { MEMORY_CONTEXT_ACK } from "./search/formatting.js";
31
- import {
32
- directItemSearch,
33
- lexicalSearch,
34
- recencySearch,
35
- } from "./search/lexical.js";
36
- import { buildFTSQuery, expandQueryForFTS } from "./search/query-expansion.js";
37
- import {
38
- applySourceCaps,
39
- mergeCandidates,
40
- rerankWithLLM,
41
- } from "./search/ranking.js";
26
+ buildTwoLayerInjection,
27
+ IDENTITY_KINDS,
28
+ MEMORY_CONTEXT_ACK,
29
+ PREFERENCE_KINDS,
30
+ } from "./search/formatting.js";
31
+ import { recencySearch } from "./search/lexical.js";
42
32
  import { isQdrantConnectionError, semanticSearch } from "./search/semantic.js";
33
+ import { applyStaleDemotion, computeStaleness } from "./search/staleness.js";
34
+ import {
35
+ classifyTiers,
36
+ type TieredCandidate,
37
+ } from "./search/tier-classifier.js";
43
38
  import type {
44
39
  Candidate,
45
- CollectedCandidates,
46
40
  DegradationReason,
47
41
  DegradationStatus,
48
- FallbackSource,
49
42
  MemoryRecallCandiateDebug,
50
43
  MemoryRecallOptions,
51
44
  MemoryRecallResult,
@@ -61,7 +54,6 @@ export {
61
54
  export type {
62
55
  DegradationReason,
63
56
  DegradationStatus,
64
- FallbackSource,
65
57
  MemoryRecallCandiateDebug,
66
58
  MemoryRecallResult,
67
59
  ScopePolicyOverride,
@@ -69,22 +61,6 @@ export type {
69
61
 
70
62
  const log = getLogger("memory-retriever");
71
63
 
72
- /** Hash the retrieval-relevant config fields so the recall cache distinguishes different configs. */
73
- function buildConfigFingerprint(config: AssistantConfig): string {
74
- const relevant = {
75
- r: config.memory.retrieval,
76
- e: {
77
- provider: config.memory.embeddings.provider,
78
- required: config.memory.embeddings.required,
79
- },
80
- ent: config.memory.entity.enabled,
81
- };
82
- return createHash("sha256")
83
- .update(JSON.stringify(relevant))
84
- .digest("hex")
85
- .slice(0, 16);
86
- }
87
-
88
64
  const EMBED_MAX_RETRIES = 3;
89
65
  const EMBED_BASE_DELAY_MS = 500;
90
66
 
@@ -151,336 +127,18 @@ function buildScopeFilter(
151
127
  return [scopeId];
152
128
  }
153
129
 
154
- /**
155
- * Shared retrieval pipeline: collect candidates from all available sources
156
- * (lexical, recency, semantic, entity, direct item search) and merge them
157
- * using RRF.
158
- */
159
- export async function collectAndMergeCandidates(
160
- query: string,
161
- config: AssistantConfig,
162
- opts?: {
163
- queryVector?: number[] | null;
164
- provider?: string;
165
- model?: string;
166
- conversationId?: string;
167
- excludeMessageIds?: string[];
168
- scopeId?: string;
169
- scopePolicyOverride?: ScopePolicyOverride;
170
- },
171
- ): Promise<CollectedCandidates> {
172
- const queryVector = opts?.queryVector ?? null;
173
- const excludeMessageIds = opts?.excludeMessageIds ?? [];
174
- const scopeId = opts?.scopeId;
175
- const scopePolicy = config.memory.retrieval.scopePolicy;
176
- // Build the list of scope IDs to include in queries.
177
- // A per-call scopePolicyOverride takes precedence over the global policy.
178
- const scopeIds = buildScopeFilter(
179
- scopeId,
180
- scopePolicy,
181
- opts?.scopePolicyOverride,
182
- );
183
-
184
- let semanticSearchFailed = false;
185
- let semanticSearchError: unknown;
186
-
187
- // Detect when semantic search won't be available so we can compensate
188
- // by boosting lexical/recency/direct item limits.
189
- const semanticUnavailable = !queryVector || isQdrantBreakerOpen();
190
- if (semanticUnavailable) {
191
- log.debug("Semantic search unavailable — boosting lexical limits");
192
- }
193
-
194
- // -- Phase 1: cheap local searches (always run) --
195
- const lexicalTopK = semanticUnavailable
196
- ? config.memory.retrieval.lexicalTopK * 2
197
- : config.memory.retrieval.lexicalTopK;
198
-
199
- // When semantic search is unavailable, expand the conversational query
200
- // into meaningful keywords for better FTS recall. This compensates for
201
- // the lack of vector-based semantic matching.
202
- const expandedFtsQuery = semanticUnavailable
203
- ? buildFTSQuery(expandQueryForFTS(query))
204
- : undefined;
205
-
206
- const lexical = lexicalSearch(
207
- query,
208
- lexicalTopK,
209
- excludeMessageIds,
210
- scopeIds,
211
- expandedFtsQuery,
212
- );
213
-
214
- const baseRecencyLimit = Math.max(
215
- 10,
216
- Math.floor(config.memory.retrieval.semanticTopK / 2),
217
- );
218
- const recencyLimit = semanticUnavailable
219
- ? Math.ceil(baseRecencyLimit * 1.5)
220
- : baseRecencyLimit;
221
- const recency = opts?.conversationId
222
- ? recencySearch(
223
- opts.conversationId,
224
- recencyLimit,
225
- excludeMessageIds,
226
- scopeIds,
227
- )
228
- : [];
229
-
230
- // Direct item search supplements FTS with LIKE-based matching.
231
- // When exclusions are present, adaptively increase the fetch size until
232
- // we collect directLimit valid (non-excluded) items or exhaust the DB.
233
- const baseDirectLimit = Math.max(10, config.memory.retrieval.lexicalTopK);
234
- const directLimit = semanticUnavailable
235
- ? baseDirectLimit * 2
236
- : baseDirectLimit;
237
-
238
- // Helper: filter fetched direct items to those with at least one non-excluded source.
239
- const filterDirectItems = (items: Candidate[]): Candidate[] => {
240
- if (items.length === 0) return items;
241
- const db = getDb();
242
- const excludedSet = new Set(excludeMessageIds);
243
- const allSources = db
244
- .select({
245
- memoryItemId: memoryItemSources.memoryItemId,
246
- messageId: memoryItemSources.messageId,
247
- })
248
- .from(memoryItemSources)
249
- .where(
250
- inArray(
251
- memoryItemSources.memoryItemId,
252
- items.map((c) => c.id),
253
- ),
254
- )
255
- .all();
256
- const hasNonExcluded = new Set<string>();
257
- const hasSources = new Set<string>();
258
- for (const s of allSources) {
259
- hasSources.add(s.memoryItemId);
260
- if (!excludedSet.has(s.messageId)) {
261
- hasNonExcluded.add(s.memoryItemId);
262
- }
263
- }
264
- return items.filter(
265
- (c) => !hasSources.has(c.id) || hasNonExcluded.has(c.id),
266
- );
267
- };
268
-
269
- let directItems: Candidate[];
270
- if (excludeMessageIds.length > 0) {
271
- const MAX_FETCH = directLimit * 8;
272
-
273
- // Probe: fetch directLimit items and measure how many survive filtering.
274
- const probe = directItemSearch(query, directLimit, scopeIds);
275
- const probeFiltered = filterDirectItems(probe);
276
- const probeExhausted = probe.length < directLimit;
277
-
278
- if (probeFiltered.length >= directLimit || probeExhausted) {
279
- directItems = probeFiltered.slice(0, directLimit);
280
- } else {
281
- // Compute exclusion ratio from probe and extrapolate the fetch size
282
- // needed to yield directLimit surviving items in a single query.
283
- const exclusionRatio =
284
- probe.length > 0 ? 1 - probeFiltered.length / probe.length : 0;
285
- // Fetch enough to compensate for the observed exclusion rate, with
286
- // a 1.5x safety margin to avoid a second round in most cases.
287
- const estimatedFetch =
288
- exclusionRatio < 1
289
- ? Math.ceil((directLimit / (1 - exclusionRatio)) * 1.5)
290
- : MAX_FETCH;
291
- let fetchSize = Math.min(
292
- Math.max(estimatedFetch, directLimit + 24),
293
- MAX_FETCH,
294
- );
295
-
296
- let fetched = directItemSearch(query, fetchSize, scopeIds);
297
- directItems = filterDirectItems(fetched).slice(0, directLimit);
298
-
299
- // Retry loop: when the estimate under-fetched (uneven exclusion
300
- // distribution), keep increasing fetchSize until quota is met or
301
- // the DB is exhausted.
302
- while (
303
- directItems.length < directLimit &&
304
- fetched.length === fetchSize &&
305
- fetchSize < MAX_FETCH
306
- ) {
307
- fetchSize = Math.min(fetchSize * 2, MAX_FETCH);
308
- fetched = directItemSearch(query, fetchSize, scopeIds);
309
- directItems = filterDirectItems(fetched).slice(0, directLimit);
310
- }
311
- }
312
- } else {
313
- directItems = directItemSearch(query, directLimit, scopeIds);
314
- }
315
-
316
- // -- Early termination check --
317
- // If cheap sources already produced enough high-relevance candidates,
318
- // skip semantic and entity search entirely.
319
- //
320
- // Deduplicate before counting: lexical and recency can return the same
321
- // segment (common when recent messages match the query), so checking raw
322
- // counts would inflate the total and trigger early termination prematurely.
323
- const etConfig = config.memory.retrieval.earlyTermination;
324
- const cheapCandidateMap = new Map<string, Candidate>();
325
- for (const c of [...lexical, ...recency, ...directItems]) {
326
- const existing = cheapCandidateMap.get(c.key);
327
- // Keep the candidate with higher query relevance (lexical score is the
328
- // best proxy we have at this stage; confidence reflects extraction
329
- // certainty, not query-match strength).
330
- if (!existing || c.lexical > existing.lexical) {
331
- cheapCandidateMap.set(c.key, c);
332
- }
333
- }
334
- const cheapCandidates = [...cheapCandidateMap.values()];
335
-
336
- // Gate on relevance instead of confidence: for direct item candidates,
337
- // c.confidence reflects extraction certainty (memory_items.confidence),
338
- // not query-match relevance. Common tokens can produce many high-confidence
339
- // but weakly relevant items that would skip semantic search exactly when
340
- // it's needed most. Instead, check lexical score (query-match relevance).
341
- //
342
- // Disable early termination when semantic search is unavailable: boosted
343
- // limits inflate cheap candidate counts, making this gate trigger more
344
- // easily. Skipping entity retrieval on top of losing semantic search
345
- // would reduce recall quality further.
346
- const canTerminateEarly =
347
- etConfig.enabled &&
348
- !semanticUnavailable &&
349
- cheapCandidates.length >= etConfig.minCandidates &&
350
- cheapCandidates.filter((c) => c.lexical >= etConfig.confidenceThreshold)
351
- .length >= etConfig.minHighConfidence;
352
-
353
- // -- Phase 2: entity search + await semantic (skipped on early termination) --
354
- let semantic: Candidate[] = [];
355
- let entity: Candidate[] = [];
356
- let candidateDepths: Map<string, number> | undefined;
357
- let relationSeedEntityCount = 0;
358
- let relationTraversedEdgeCount = 0;
359
- let relationNeighborEntityCount = 0;
360
- let relationExpandedItemCount = 0;
361
-
362
- if (!canTerminateEarly) {
363
- // Start semantic search now that we know early termination won't apply.
364
- // The network round-trip overlaps with entity search below.
365
- const semanticPromise = queryVector
366
- ? semanticSearch(
367
- queryVector,
368
- opts?.provider ?? "unknown",
369
- opts?.model ?? "unknown",
370
- config.memory.retrieval.semanticTopK,
371
- excludeMessageIds,
372
- scopeIds,
373
- ).catch((err): Candidate[] => {
374
- semanticSearchFailed = true;
375
- semanticSearchError = err;
376
- if (isQdrantConnectionError(err)) {
377
- log.warn(
378
- { err },
379
- "Qdrant is unavailable — semantic search disabled, memory recall will be degraded",
380
- );
381
- } else {
382
- log.warn(
383
- { err },
384
- "Semantic search failed, continuing with other retrieval methods",
385
- );
386
- }
387
- return [];
388
- })
389
- : null;
390
-
391
- // Entity search is synchronous — run it while the semantic promise
392
- // is in flight.
393
- if (config.memory.entity.enabled) {
394
- const entitySearchResult = entitySearch(
395
- query,
396
- config.memory.entity,
397
- scopeIds,
398
- excludeMessageIds,
399
- );
400
- entity = entitySearchResult.candidates;
401
- candidateDepths = entitySearchResult.candidateDepths;
402
- relationSeedEntityCount = entitySearchResult.relationSeedEntityCount;
403
- relationTraversedEdgeCount =
404
- entitySearchResult.relationTraversedEdgeCount;
405
- relationNeighborEntityCount =
406
- entitySearchResult.relationNeighborEntityCount;
407
- relationExpandedItemCount = entitySearchResult.relationExpandedItemCount;
408
- }
409
-
410
- if (semanticPromise) {
411
- semantic = await semanticPromise;
412
- }
413
- }
414
-
415
- if (canTerminateEarly) {
416
- log.debug(
417
- {
418
- cheapCandidateCount: cheapCandidates.length,
419
- highRelevanceCount: cheapCandidates.filter(
420
- (c) => c.lexical >= etConfig.confidenceThreshold,
421
- ).length,
422
- },
423
- "Early termination: skipping semantic and entity search — sufficient high-relevance candidates from cheap sources",
424
- );
425
- }
426
-
427
- const relationScoreMultiplier =
428
- config.memory.entity.enabled &&
429
- config.memory.entity.relationRetrieval.enabled
430
- ? config.memory.entity.relationRetrieval.neighborScoreMultiplier
431
- : undefined;
432
- const depthMap =
433
- config.memory.entity.enabled &&
434
- config.memory.entity.relationRetrieval.depthDecay
435
- ? candidateDepths
436
- : undefined;
437
- const merged = mergeCandidates(
438
- lexical,
439
- semantic,
440
- recency,
441
- [...entity, ...directItems],
442
- config.memory.retrieval.freshness,
443
- relationScoreMultiplier,
444
- depthMap,
445
- );
446
-
447
- return {
448
- lexical,
449
- recency,
450
- semantic,
451
- entity,
452
- relationSeedEntityCount,
453
- relationTraversedEdgeCount,
454
- relationNeighborEntityCount,
455
- relationExpandedItemCount,
456
- earlyTerminated: canTerminateEarly,
457
- semanticSearchFailed,
458
- semanticUnavailable,
459
- semanticSearchError,
460
- merged,
461
- };
462
- }
463
-
464
130
  /**
465
131
  * Build a structured degradation status describing which retrieval
466
132
  * capabilities are unavailable and what fallback sources remain.
467
133
  */
468
134
  function buildDegradationStatus(
469
135
  reason: DegradationReason,
470
- config: AssistantConfig,
136
+ _config: AssistantConfig,
471
137
  ): DegradationStatus {
472
- const fallbackSources: FallbackSource[] = [
473
- "lexical",
474
- "recency",
475
- "direct_item",
476
- ];
477
- if (config.memory.entity.enabled) {
478
- fallbackSources.push("entity");
479
- }
480
138
  return {
481
139
  semanticUnavailable: true,
482
140
  reason,
483
- fallbackSources,
141
+ fallbackSources: ["recency"],
484
142
  };
485
143
  }
486
144
 
@@ -576,86 +234,184 @@ async function generateQueryEmbedding(
576
234
  return { queryVector, provider, model, degraded, degradation, reason };
577
235
  }
578
236
 
579
- /** Result of the re-ranking stage. */
580
- interface RerankResult {
581
- merged: Candidate[];
582
- rerankApplied: boolean;
583
- }
584
-
585
237
  /**
586
- * Apply source caps and optionally LLM re-rank the merged candidates.
587
- * Returns `null` when the caller should return an early-exit `emptyResult`
588
- * (abort during re-ranking).
238
+ * Memory recall pipeline: hybrid search tier classification
239
+ * staleness annotation two-layer XML injection.
240
+ *
241
+ * Pipeline steps:
242
+ * 1. Build query text (caller provides via buildMemoryQuery)
243
+ * 2. Generate dense + sparse embeddings
244
+ * 3. Hybrid search on Qdrant (dense + sparse RRF fusion)
245
+ * 4. Supplement with recency search (conversation-scoped, DB only)
246
+ * 5. Merge + deduplicate results
247
+ * 6. Classify tiers (score > 0.8 → tier 1, > 0.6 → tier 2)
248
+ * 7. Enrich item candidates with metadata for staleness
249
+ * 8. Compute staleness per item
250
+ * 9. Demote very_stale tier 1 → tier 2
251
+ * 10. Build two-layer XML injection with budget allocation
589
252
  */
590
- async function rerankMergedCandidates(
253
+ export async function buildMemoryRecall(
591
254
  query: string,
592
- candidates: Candidate[],
255
+ conversationId: string,
593
256
  config: AssistantConfig,
594
- signal: AbortSignal | undefined,
595
- start: number,
596
- provider: string | undefined,
597
- model: string | undefined,
598
- ): Promise<RerankResult | { earlyExit: MemoryRecallResult }> {
599
- let merged = applySourceCaps(candidates, config);
600
- let rerankApplied = false;
601
-
602
- const rerankingConfig = config.memory.retrieval.reranking;
603
- if (rerankingConfig.enabled && merged.length >= 5) {
604
- const rerankStart = Date.now();
605
- const topCandidates = merged.slice(0, rerankingConfig.topK);
257
+ options?: MemoryRecallOptions,
258
+ ): Promise<MemoryRecallResult> {
259
+ const start = Date.now();
260
+ const excludeMessageIds =
261
+ options?.excludeMessageIds?.filter((id) => id.length > 0) ?? [];
262
+ const signal = options?.signal;
263
+
264
+ if (!config.memory.enabled) {
265
+ return emptyResult({
266
+ enabled: false,
267
+ degraded: false,
268
+ reason: "memory.disabled",
269
+ latencyMs: Date.now() - start,
270
+ });
271
+ }
272
+ if (signal?.aborted) {
273
+ return emptyResult({
274
+ enabled: true,
275
+ degraded: false,
276
+ reason: "memory.aborted",
277
+ latencyMs: Date.now() - start,
278
+ });
279
+ }
280
+
281
+ // ── Step 1+2: Generate dense and sparse embeddings ──────────────
282
+ const embeddingResult = await generateQueryEmbedding(
283
+ query,
284
+ config,
285
+ signal,
286
+ start,
287
+ );
288
+ if ("earlyExit" in embeddingResult) return embeddingResult.earlyExit;
289
+
290
+ const { queryVector, provider, model } = embeddingResult;
291
+
292
+ // Generate sparse embedding for the query text (TF-IDF based)
293
+ const sparseVector = generateSparseEmbedding(query);
294
+ const sparseVectorAvailable = sparseVector.indices.length > 0;
295
+
296
+ // ── Step 3: Hybrid search on Qdrant ─────────────────────────────
297
+ const scopePolicy = config.memory.retrieval.scopePolicy;
298
+ const scopeIds = buildScopeFilter(
299
+ options?.scopeId,
300
+ scopePolicy,
301
+ options?.scopePolicyOverride,
302
+ );
303
+
304
+ const HYBRID_LIMIT = 20;
305
+
306
+ let hybridCandidates: Candidate[] = [];
307
+ let semanticSearchFailed = false;
308
+ let sparseVectorUsed = false;
309
+ const hybridSearchStart = Date.now();
310
+
311
+ const qdrantBreakerOpen = isQdrantBreakerOpen();
312
+ if (queryVector && !qdrantBreakerOpen) {
606
313
  try {
607
- const reranked = await rerankWithLLM(
608
- query,
609
- topCandidates,
610
- rerankingConfig,
611
- );
612
- merged = [...reranked, ...merged.slice(rerankingConfig.topK)];
613
- rerankApplied = true;
614
- log.debug(
615
- {
616
- rerankLatencyMs: Date.now() - rerankStart,
617
- rerankedCount: reranked.length,
618
- },
619
- "LLM re-ranking completed",
314
+ hybridCandidates = await semanticSearch(
315
+ queryVector,
316
+ provider ?? "unknown",
317
+ model ?? "unknown",
318
+ HYBRID_LIMIT,
319
+ excludeMessageIds,
320
+ scopeIds,
321
+ sparseVectorAvailable ? sparseVector : undefined,
620
322
  );
323
+ sparseVectorUsed = sparseVectorAvailable;
621
324
  } catch (err) {
622
- if (signal?.aborted || isAbortError(err)) {
623
- return {
624
- earlyExit: emptyResult({
625
- enabled: true,
626
- degraded: false,
627
- reason: "memory.aborted",
628
- provider,
629
- model,
630
- latencyMs: Date.now() - start,
631
- }),
632
- };
325
+ semanticSearchFailed = true;
326
+ if (isQdrantConnectionError(err)) {
327
+ log.warn({ err }, "Qdrant unavailable — hybrid search disabled");
328
+ } else {
329
+ log.warn({ err }, "Hybrid search failed, continuing with recency only");
633
330
  }
634
- log.warn(
635
- { err, rerankLatencyMs: Date.now() - rerankStart },
636
- "LLM re-ranking failed, using RRF order",
637
- );
638
331
  }
639
332
  }
333
+ const hybridSearchMs = Date.now() - hybridSearchStart;
640
334
 
641
- return { merged, rerankApplied };
642
- }
335
+ // ── Step 4: Recency supplement (DB only, conversation-scoped) ───
336
+ const recencyLimit = 5;
337
+ const recencyCandidates = conversationId
338
+ ? recencySearch(conversationId, recencyLimit, excludeMessageIds, scopeIds)
339
+ : [];
643
340
 
644
- /**
645
- * Trim candidates to the token budget, format for injection, and assemble
646
- * the final `MemoryRecallResult`.
647
- */
648
- function formatRecallResult(
649
- query: string,
650
- collected: CollectedCandidates,
651
- merged: Candidate[],
652
- rerankApplied: boolean,
653
- config: AssistantConfig,
654
- options: MemoryRecallOptions | undefined,
655
- embedding: EmbeddingResult,
656
- start: number,
657
- ): MemoryRecallResult {
658
- const mergedCount = merged.length;
341
+ // ── Step 5: Merge and deduplicate ──────────────────────────────
342
+ const candidateMap = new Map<string, Candidate>();
343
+ for (const c of [...hybridCandidates, ...recencyCandidates]) {
344
+ const existing = candidateMap.get(c.key);
345
+ if (!existing) {
346
+ candidateMap.set(c.key, { ...c });
347
+ continue;
348
+ }
349
+ // Keep highest scores from each source
350
+ existing.semantic = Math.max(existing.semantic, c.semantic);
351
+ existing.recency = Math.max(existing.recency, c.recency);
352
+ existing.confidence = Math.max(existing.confidence, c.confidence);
353
+ existing.importance = Math.max(existing.importance, c.importance);
354
+ if (c.text.length > existing.text.length) {
355
+ existing.text = c.text;
356
+ }
357
+ }
358
+
359
+ // Compute RRF-style final scores for the merged candidates
360
+ const allCandidates = [...candidateMap.values()];
361
+ for (const c of allCandidates) {
362
+ // Simple weighted combination — hybrid search already applies RRF fusion
363
+ // at the Qdrant level; here we combine the fused semantic score with recency.
364
+ c.finalScore = c.semantic * 0.7 + c.recency * 0.2 + c.confidence * 0.1;
365
+ }
366
+ allCandidates.sort((a, b) => b.finalScore - a.finalScore);
367
+
368
+ // ── Step 6: Tier classification ─────────────────────────────────
369
+ // Recency-only candidates (semantic=0) can never reach the tier 2 threshold
370
+ // (>0.6) since their max finalScore is 0.3. Promote them directly to tier 2
371
+ // so recent conversation context is preserved even without semantic signal.
372
+ const recencyOnlyKeys = new Set(
373
+ allCandidates
374
+ .filter((c) => c.semantic === 0 && c.recency > 0)
375
+ .map((c) => c.key),
376
+ );
377
+ const tiered = classifyTiers(allCandidates);
378
+ if (recencyOnlyKeys.size > 0) {
379
+ const alreadyTiered = new Set(tiered.map((c) => c.key));
380
+ for (const c of allCandidates) {
381
+ if (recencyOnlyKeys.has(c.key) && !alreadyTiered.has(c.key)) {
382
+ tiered.push({ ...c, tier: 2 });
383
+ }
384
+ }
385
+ }
386
+
387
+ // ── Step 6b: Enrich candidates with source labels ──────────────
388
+ enrichSourceLabels(tiered);
389
+
390
+ // ── Step 7: Enrich with item metadata for staleness ─────────────
391
+ const itemIds = tiered.filter((c) => c.type === "item").map((c) => c.id);
392
+ const itemMetadataMap = enrichItemMetadata(itemIds);
393
+
394
+ // ── Step 8: Compute staleness per item ──────────────────────────
395
+ const now = Date.now();
396
+ for (const c of tiered) {
397
+ if (c.type !== "item") continue;
398
+ const meta = itemMetadataMap.get(c.id);
399
+ if (!meta) continue;
400
+ const { level } = computeStaleness(
401
+ {
402
+ kind: c.kind,
403
+ firstSeenAt: meta.firstSeenAt,
404
+ sourceConversationCount: meta.sourceConversationCount,
405
+ },
406
+ now,
407
+ );
408
+ c.staleness = level;
409
+ }
410
+
411
+ // ── Step 9: Demote very_stale tier 1 → tier 2 ──────────────────
412
+ const afterDemotion = applyStaleDemotion(tiered);
413
+
414
+ // ── Step 10: Budget allocation and two-layer injection ──────────
659
415
  const maxInjectTokens = Math.max(
660
416
  1,
661
417
  Math.floor(
@@ -664,241 +420,265 @@ function formatRecallResult(
664
420
  ),
665
421
  );
666
422
 
667
- const formatted = formatRecallText(merged, {
668
- format: config.memory.retrieval.injectionFormat,
669
- maxTokens: maxInjectTokens,
423
+ // Split into sections for two-layer injection
424
+ const identityItems = afterDemotion.filter(
425
+ (c) => c.tier === 1 && IDENTITY_KINDS.has(c.kind),
426
+ );
427
+ const preferences = afterDemotion.filter(
428
+ (c) => c.tier === 1 && PREFERENCE_KINDS.has(c.kind),
429
+ );
430
+ const tier1Candidates = afterDemotion.filter(
431
+ (c) =>
432
+ c.tier === 1 &&
433
+ !IDENTITY_KINDS.has(c.kind) &&
434
+ !PREFERENCE_KINDS.has(c.kind),
435
+ );
436
+ const tier2Candidates = afterDemotion.filter((c) => c.tier === 2);
437
+
438
+ const injectedText = buildTwoLayerInjection({
439
+ identityItems,
440
+ tier1Candidates,
441
+ tier2Candidates,
442
+ preferences,
443
+ totalBudgetTokens: maxInjectTokens,
670
444
  });
671
- const { selected } = formatted;
672
- const injectedText = formatted.text;
673
445
 
674
- const topCandidates: MemoryRecallCandiateDebug[] = selected
446
+ // ── Assemble result ─────────────────────────────────────────────
447
+ const selectedCount =
448
+ identityItems.length +
449
+ tier1Candidates.length +
450
+ tier2Candidates.length +
451
+ preferences.length;
452
+
453
+ const tier1Count = afterDemotion.filter((c) => c.tier === 1).length;
454
+ const tier2Count = afterDemotion.filter((c) => c.tier === 2).length;
455
+ const stalenessStats = {
456
+ fresh: afterDemotion.filter((c) => c.staleness === "fresh").length,
457
+ aging: afterDemotion.filter((c) => c.staleness === "aging").length,
458
+ stale: afterDemotion.filter((c) => c.staleness === "stale").length,
459
+ very_stale: afterDemotion.filter((c) => c.staleness === "very_stale")
460
+ .length,
461
+ };
462
+
463
+ const topCandidates: MemoryRecallCandiateDebug[] = afterDemotion
675
464
  .slice(0, 10)
676
465
  .map((c) => ({
677
466
  key: c.key,
678
467
  type: c.type,
679
468
  kind: c.kind,
680
469
  finalScore: c.finalScore,
681
- lexical: c.lexical,
682
470
  semantic: c.semantic,
683
471
  recency: c.recency,
684
472
  }));
685
473
 
686
474
  const latencyMs = Date.now() - start;
475
+
476
+ // Propagate degradation from semantic search failure or breaker-open skip
477
+ if (
478
+ semanticSearchFailed ||
479
+ qdrantBreakerOpen ||
480
+ (!queryVector && config.memory.embeddings.required)
481
+ ) {
482
+ embeddingResult.degraded = true;
483
+ embeddingResult.reason =
484
+ embeddingResult.reason ??
485
+ (qdrantBreakerOpen
486
+ ? "memory.qdrant_breaker_open"
487
+ : "memory.hybrid_search_failure");
488
+ }
489
+
687
490
  log.debug(
688
491
  {
689
492
  query: truncate(query, 120),
690
- lexicalHits: collected.lexical.length,
691
- semanticHits: collected.semantic.length,
692
- recencyHits: collected.recency.length,
693
- entityHits: collected.entity.length,
694
- relationSeedEntityCount: collected.relationSeedEntityCount,
695
- relationTraversedEdgeCount: collected.relationTraversedEdgeCount,
696
- relationNeighborEntityCount: collected.relationNeighborEntityCount,
697
- relationExpandedItemCount: collected.relationExpandedItemCount,
698
- earlyTerminated: collected.earlyTerminated,
699
- mergedCount,
700
- selected: selected.length,
493
+ hybridHits: hybridCandidates.length,
494
+ recencyHits: recencyCandidates.length,
495
+ mergedCount: allCandidates.length,
496
+ tier1Count,
497
+ tier2Count,
498
+ stalenessStats,
499
+ selectedCount,
701
500
  maxInjectTokens,
702
- rerankApplied,
703
501
  injectedTokens: estimateTextTokens(injectedText),
704
502
  latencyMs,
705
503
  },
706
504
  "Memory recall completed",
707
505
  );
708
506
 
709
- return {
507
+ const result: MemoryRecallResult = {
710
508
  enabled: true,
711
- degraded: embedding.degraded,
712
- degradation: embedding.degradation,
713
- reason: embedding.reason,
714
- provider: embedding.provider,
715
- model: embedding.model,
716
- lexicalHits: collected.lexical.length,
717
- semanticHits: collected.semantic.length,
718
- recencyHits: collected.recency.length,
719
- entityHits: collected.entity.length,
720
- relationSeedEntityCount: collected.relationSeedEntityCount,
721
- relationTraversedEdgeCount: collected.relationTraversedEdgeCount,
722
- relationNeighborEntityCount: collected.relationNeighborEntityCount,
723
- relationExpandedItemCount: collected.relationExpandedItemCount,
724
- earlyTerminated: collected.earlyTerminated,
725
- mergedCount,
726
- selectedCount: selected.length,
727
- rerankApplied,
509
+ degraded: embeddingResult.degraded,
510
+ degradation: embeddingResult.degradation,
511
+ reason: embeddingResult.reason,
512
+ provider: embeddingResult.provider,
513
+ model: embeddingResult.model,
514
+ semanticHits: hybridCandidates.length,
515
+ recencyHits: recencyCandidates.length,
516
+ mergedCount: allCandidates.length,
517
+ selectedCount,
728
518
  injectedTokens: estimateTextTokens(injectedText),
729
519
  injectedText,
730
520
  latencyMs,
731
521
  topCandidates,
522
+ tier1Count,
523
+ tier2Count,
524
+ hybridSearchMs,
525
+ sparseVectorUsed,
732
526
  };
527
+
528
+ return result;
733
529
  }
734
530
 
735
- export async function buildMemoryRecall(
736
- query: string,
737
- conversationId: string,
738
- config: AssistantConfig,
739
- options?: MemoryRecallOptions,
740
- ): Promise<MemoryRecallResult> {
741
- const start = Date.now();
742
- const versionSnapshot = getMemoryVersion();
743
- const excludeMessageIds =
744
- options?.excludeMessageIds?.filter((id) => id.length > 0) ?? [];
745
- const signal = options?.signal;
746
- if (!config.memory.enabled) {
747
- return emptyResult({
748
- enabled: false,
749
- degraded: false,
750
- reason: "memory.disabled",
751
- latencyMs: Date.now() - start,
752
- });
753
- }
754
- if (signal?.aborted) {
755
- return emptyResult({
756
- enabled: true,
757
- degraded: false,
758
- reason: "memory.aborted",
759
- latencyMs: Date.now() - start,
760
- });
761
- }
531
+ /**
532
+ * Enrich item candidates with metadata needed for staleness computation:
533
+ * - firstSeenAt: when the item was first extracted
534
+ * - sourceConversationCount: number of distinct conversations that sourced this item
535
+ */
536
+ function enrichItemMetadata(
537
+ itemIds: string[],
538
+ ): Map<
539
+ string,
540
+ { firstSeenAt: number; sourceConversationCount: number; kind: string }
541
+ > {
542
+ const result = new Map<
543
+ string,
544
+ { firstSeenAt: number; sourceConversationCount: number; kind: string }
545
+ >();
546
+ if (itemIds.length === 0) return result;
762
547
 
763
- // Check recall cache
764
- const configFingerprint = buildConfigFingerprint(config);
765
- const cached = getCachedRecall(
766
- query,
767
- conversationId,
768
- options,
769
- configFingerprint,
770
- );
771
- if (cached) {
772
- log.debug(
773
- { query: truncate(query, 120), latencyMs: Date.now() - start },
774
- "Memory recall served from cache",
775
- );
776
- return { ...cached, latencyMs: Date.now() - start };
777
- }
548
+ try {
549
+ const db = getDb();
778
550
 
779
- // Stage 1: Embedding generation
780
- const embeddingResult = await generateQueryEmbedding(
781
- query,
782
- config,
783
- signal,
784
- start,
785
- );
786
- if ("earlyExit" in embeddingResult) return embeddingResult.earlyExit;
551
+ // Fetch firstSeenAt and kind from memory_items
552
+ const items = db
553
+ .select({
554
+ id: memoryItems.id,
555
+ firstSeenAt: memoryItems.firstSeenAt,
556
+ kind: memoryItems.kind,
557
+ })
558
+ .from(memoryItems)
559
+ .where(inArray(memoryItems.id, itemIds))
560
+ .all();
787
561
 
788
- // Stage 2: Candidate collection (lexical, recency, direct, semantic, entity)
789
- let collected: CollectedCandidates;
790
- try {
791
- collected = await collectAndMergeCandidates(query, config, {
792
- queryVector: embeddingResult.queryVector,
793
- provider: embeddingResult.provider,
794
- model: embeddingResult.model,
795
- conversationId,
796
- excludeMessageIds,
797
- scopeId: options?.scopeId,
798
- scopePolicyOverride: options?.scopePolicyOverride,
799
- });
800
- } catch (err) {
801
- if (signal?.aborted || isAbortError(err)) {
802
- return emptyResult({
803
- enabled: true,
804
- degraded: false,
805
- reason: "memory.aborted",
806
- provider: embeddingResult.provider,
807
- model: embeddingResult.model,
808
- latencyMs: Date.now() - start,
562
+ for (const item of items) {
563
+ result.set(item.id, {
564
+ firstSeenAt: item.firstSeenAt,
565
+ kind: item.kind,
566
+ sourceConversationCount: 1, // default, updated below
809
567
  });
810
568
  }
569
+
570
+ // Compute sourceConversationCount: count distinct conversation IDs
571
+ // across the memory_item_sources → messages join.
572
+ const sourceCountRows = db
573
+ .select({
574
+ memoryItemId: memoryItemSources.memoryItemId,
575
+ conversationCount:
576
+ sql<number>`COUNT(DISTINCT ${messages.conversationId})`.as(
577
+ "conversation_count",
578
+ ),
579
+ })
580
+ .from(memoryItemSources)
581
+ .innerJoin(messages, sql`${memoryItemSources.messageId} = ${messages.id}`)
582
+ .where(inArray(memoryItemSources.memoryItemId, itemIds))
583
+ .groupBy(memoryItemSources.memoryItemId)
584
+ .all();
585
+
586
+ for (const row of sourceCountRows) {
587
+ const existing = result.get(row.memoryItemId);
588
+ if (existing) {
589
+ existing.sourceConversationCount = row.conversationCount;
590
+ }
591
+ }
592
+ } catch (err) {
811
593
  log.warn(
812
594
  { err },
813
- "Memory retrieval failed, returning degraded empty recall",
595
+ "Failed to enrich item metadata for staleness computation",
814
596
  );
815
- return emptyResult({
816
- enabled: true,
817
- degraded: true,
818
- reason: `memory.retrieval_failure: ${
819
- err instanceof Error ? err.message : String(err)
820
- }`,
821
- provider: embeddingResult.provider,
822
- model: embeddingResult.model,
823
- latencyMs: Date.now() - start,
824
- });
825
597
  }
826
598
 
827
- // Propagate semantic search failure or breaker-based unavailability into
828
- // degradation state. This ensures results computed with boosted limits
829
- // are marked degraded and excluded from the recall cache — preventing
830
- // stale boosted results from being served after the breaker closes.
831
- //
832
- // Exception: when semanticUnavailable is solely because no embedding
833
- // provider is configured (queryVector == null) and embeddings are not
834
- // required, lexical-only results are the expected steady state — do not
835
- // mark as degraded.
836
- const semanticActuallyFailed =
837
- collected.semanticSearchFailed ||
838
- (collected.semanticUnavailable &&
839
- (embeddingResult.queryVector != null ||
840
- config.memory.embeddings.required));
841
- if (semanticActuallyFailed) {
842
- embeddingResult.degraded = true;
843
- embeddingResult.reason =
844
- embeddingResult.reason ??
845
- (collected.semanticUnavailable
846
- ? embeddingResult.queryVector != null
847
- ? "memory.qdrant_circuit_open"
848
- : "memory.embedding_unavailable"
849
- : "memory.semantic_search_failure");
850
- if (!embeddingResult.degradation) {
851
- const isQdrantIssue =
852
- embeddingResult.queryVector != null ||
853
- isQdrantConnectionError(collected.semanticSearchError) ||
854
- collected.semanticSearchError instanceof QdrantCircuitOpenError;
855
- const reason: DegradationReason = isQdrantIssue
856
- ? "qdrant_unavailable"
857
- : "embedding_generation_failed";
858
- embeddingResult.degradation = buildDegradationStatus(reason, config);
859
- }
860
- }
599
+ return result;
600
+ }
861
601
 
862
- // Stage 3: Source caps + LLM re-ranking
863
- const rerankResult = await rerankMergedCandidates(
864
- query,
865
- collected.merged,
866
- config,
867
- signal,
868
- start,
869
- embeddingResult.provider,
870
- embeddingResult.model,
871
- );
872
- if ("earlyExit" in rerankResult) return rerankResult.earlyExit;
602
+ /**
603
+ * Enrich tiered candidates with source labels (conversation titles).
604
+ *
605
+ * For "item" candidates: joins through memoryItemSources → messages → conversations
606
+ * to find the most recent conversation title associated with the item.
607
+ * For "segment" / "summary" candidates: looks up the conversation title directly
608
+ * via the candidate's key (which contains the conversationId for segments).
609
+ *
610
+ * Mutates the candidates in-place for efficiency.
611
+ */
612
+ function enrichSourceLabels(candidates: TieredCandidate[]): void {
613
+ if (candidates.length === 0) return;
873
614
 
874
- // Stage 4: Token budget trimming and result formatting
875
- const result = formatRecallResult(
876
- query,
877
- collected,
878
- rerankResult.merged,
879
- rerankResult.rerankApplied,
880
- config,
881
- options,
882
- embeddingResult,
883
- start,
884
- );
615
+ try {
616
+ const db = getDb();
885
617
 
886
- // Only cache non-degraded results degraded results (e.g. lexical-only
887
- // fallback when embeddings fail) would delay quality recovery once the
888
- // embedding backend comes back.
889
- if (!result.degraded) {
890
- setCachedRecall(
891
- query,
892
- conversationId,
893
- options,
894
- result,
895
- versionSnapshot,
896
- configFingerprint,
897
- );
618
+ // Collect item IDs for items that need source label lookup
619
+ const itemCandidates = candidates.filter((c) => c.type === "item");
620
+ const itemIds = itemCandidates.map((c) => c.id);
621
+
622
+ if (itemIds.length > 0) {
623
+ // For items: find conversation titles via memoryItemSources → messages → conversations.
624
+ // Pick the most recent conversation title per item.
625
+ const rows = db
626
+ .select({
627
+ memoryItemId: memoryItemSources.memoryItemId,
628
+ title: conversations.title,
629
+ conversationUpdatedAt: conversations.updatedAt,
630
+ })
631
+ .from(memoryItemSources)
632
+ .innerJoin(
633
+ messages,
634
+ sql`${memoryItemSources.messageId} = ${messages.id}`,
635
+ )
636
+ .innerJoin(
637
+ conversations,
638
+ sql`${messages.conversationId} = ${conversations.id}`,
639
+ )
640
+ .where(inArray(memoryItemSources.memoryItemId, itemIds))
641
+ .all();
642
+
643
+ // Group by item ID and pick the most recently updated conversation title
644
+ const titleMap = new Map<string, string>();
645
+ const updatedAtMap = new Map<string, number>();
646
+ for (const row of rows) {
647
+ if (!row.title) continue;
648
+ const existing = updatedAtMap.get(row.memoryItemId);
649
+ if (existing === undefined || row.conversationUpdatedAt > existing) {
650
+ titleMap.set(row.memoryItemId, row.title);
651
+ updatedAtMap.set(row.memoryItemId, row.conversationUpdatedAt);
652
+ }
653
+ }
654
+
655
+ for (const c of itemCandidates) {
656
+ const title = titleMap.get(c.id);
657
+ if (title) {
658
+ c.sourceLabel = title;
659
+ }
660
+ }
661
+ }
662
+
663
+ // For segment candidates: the key format is "seg:<segmentId>" and the id is the segment's id.
664
+ // We can look up the conversation title via the segment's conversationId in memory_segments.
665
+ // However, segments already reference a conversationId in the schema — but the Candidate type
666
+ // doesn't carry it. For now, skip segment source labels as the join path would require
667
+ // importing memorySegments and an additional query. The primary value is item source labels.
668
+ } catch (err) {
669
+ log.warn({ err }, "Failed to enrich candidates with source labels");
898
670
  }
899
- return result;
900
671
  }
901
672
 
673
+ /**
674
+ * Strip memory recall messages from the conversation history.
675
+ *
676
+ * Handles both exact text matching and `<memory_context>` XML wrapper
677
+ * detection: when the recall text starts with `<memory_context>`, we
678
+ * also match user messages whose sole text block starts with the same
679
+ * tag (covering cases where the exact text differs slightly due to
680
+ * dynamic content).
681
+ */
902
682
  export function stripMemoryRecallMessages<
903
683
  T extends {
904
684
  role: "user" | "assistant";
@@ -918,6 +698,25 @@ export function stripMemoryRecallMessages<
918
698
  msg.content[0].type === "text" &&
919
699
  msg.content[0].text === MEMORY_CONTEXT_ACK;
920
700
 
701
+ // Check if the recall text uses the <memory_context> XML format
702
+ const isMemoryContextFormat = recallText
703
+ .trimStart()
704
+ .startsWith("<memory_context>");
705
+
706
+ // Helper: does a text block match the recall text?
707
+ const textMatches = (text: string | undefined): boolean => {
708
+ if (!text) return false;
709
+ if (text === recallText) return true;
710
+ // For <memory_context> format, match any block that starts with the tag
711
+ if (
712
+ isMemoryContextFormat &&
713
+ text.trimStart().startsWith("<memory_context>")
714
+ ) {
715
+ return true;
716
+ }
717
+ return false;
718
+ };
719
+
921
720
  // Prefer the canonical separate_context_message pair: a user message whose
922
721
  // sole text block is the recall text, followed by an assistant ack. This
923
722
  // must be checked first so that a real user message that happens to contain
@@ -928,7 +727,7 @@ export function stripMemoryRecallMessages<
928
727
  if (msg.role !== "user") continue;
929
728
  if (msg.content.length !== 1) continue;
930
729
  const block = msg.content[0];
931
- if (block.type !== "text" || block.text !== recallText) continue;
730
+ if (block.type !== "text" || !textMatches(block.text)) continue;
932
731
  const next = messages[i + 1];
933
732
  if (next && isAck(next)) {
934
733
  return [...messages.slice(0, i), ...messages.slice(i + 2)];
@@ -937,7 +736,7 @@ export function stripMemoryRecallMessages<
937
736
  }
938
737
 
939
738
  // Fall back to generic text-match removal: find the last user message
940
- // containing the recall text block (prepend_user_block or repair-merged).
739
+ // containing the recall text block.
941
740
  let targetIndex = -1;
942
741
  let blockIndex = -1;
943
742
  for (let i = messages.length - 1; i >= 0; i--) {
@@ -945,7 +744,7 @@ export function stripMemoryRecallMessages<
945
744
  if (msg.role !== "user" || msg.content.length === 0) continue;
946
745
  for (let bi = msg.content.length - 1; bi >= 0; bi--) {
947
746
  const block = msg.content[bi];
948
- if (block.type === "text" && block.text === recallText) {
747
+ if (block.type === "text" && textMatches(block.text)) {
949
748
  targetIndex = i;
950
749
  blockIndex = bi;
951
750
  break;
@@ -983,21 +782,6 @@ export function stripMemoryRecallMessages<
983
782
  return cleaned;
984
783
  }
985
784
 
986
- export function injectMemoryRecallIntoUserMessage<
987
- T extends {
988
- role: "user" | "assistant";
989
- content: Array<{ type: string; text?: string }>;
990
- },
991
- >(message: T, memoryRecallText: string): T {
992
- if (message.role !== "user") return message;
993
- if (memoryRecallText.trim().length === 0) return message;
994
- const memoryBlock = { type: "text", text: memoryRecallText } as const;
995
- return {
996
- ...message,
997
- content: [memoryBlock, ...message.content] as T["content"],
998
- } as T;
999
- }
1000
-
1001
785
  /**
1002
786
  * Inject memory recall as a separate user+assistant message pair before the
1003
787
  * last user message. This separates memory context from the user's actual
@@ -1049,18 +833,10 @@ function emptyResult(
1049
833
  reason: init.reason,
1050
834
  provider: init.provider,
1051
835
  model: init.model,
1052
- lexicalHits: 0,
1053
836
  semanticHits: 0,
1054
837
  recencyHits: 0,
1055
- entityHits: 0,
1056
- relationSeedEntityCount: 0,
1057
- relationTraversedEdgeCount: 0,
1058
- relationNeighborEntityCount: 0,
1059
- relationExpandedItemCount: 0,
1060
- earlyTerminated: false,
1061
838
  mergedCount: 0,
1062
839
  selectedCount: 0,
1063
- rerankApplied: false,
1064
840
  injectedTokens: 0,
1065
841
  injectedText: "",
1066
842
  latencyMs: init.latencyMs,