@vellumai/assistant 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/ARCHITECTURE.md +16 -1
  2. package/docs/architecture/memory.md +5 -2
  3. package/node_modules/@vellumai/gateway-client/src/ipc-client.ts +13 -4
  4. package/node_modules/@vellumai/skill-host-contracts/src/assistant-event.ts +0 -9
  5. package/node_modules/@vellumai/slack-text/src/index.test.ts +18 -35
  6. package/node_modules/@vellumai/slack-text/src/index.ts +2 -48
  7. package/openapi.yaml +449 -22
  8. package/package.json +1 -1
  9. package/src/__tests__/app-control-flow.test.ts +21 -11
  10. package/src/__tests__/assistant-event-hub.test.ts +48 -0
  11. package/src/__tests__/assistant-event.test.ts +0 -10
  12. package/src/__tests__/assistant-events-sse-hardening.test.ts +2 -7
  13. package/src/__tests__/assistant-feature-flags-integration.test.ts +18 -0
  14. package/src/__tests__/auto-analysis-end-to-end.test.ts +62 -1
  15. package/src/__tests__/background-workers-disk-pressure.test.ts +268 -0
  16. package/src/__tests__/call-conversation-messages.test.ts +8 -2
  17. package/src/__tests__/channel-inbound-disk-pressure.test.ts +537 -0
  18. package/src/__tests__/channel-readiness-service.test.ts +4 -2
  19. package/src/__tests__/config-loader-backfill.test.ts +379 -0
  20. package/src/__tests__/config-schema.test.ts +1 -0
  21. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +18 -9
  22. package/src/__tests__/config-watcher.test.ts +140 -69
  23. package/src/__tests__/context-search-agent-runner.test.ts +61 -3
  24. package/src/__tests__/context-search-conversations-source.test.ts +0 -24
  25. package/src/__tests__/context-search-fanout.test.ts +0 -1
  26. package/src/__tests__/context-search-memory-source.test.ts +3 -7
  27. package/src/__tests__/context-search-memory-v2-source.test.ts +0 -2
  28. package/src/__tests__/context-search-pkb-source.test.ts +0 -1
  29. package/src/__tests__/context-search-workspace-source.test.ts +0 -1
  30. package/src/__tests__/conversation-abort-tool-results.test.ts +6 -0
  31. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +223 -0
  32. package/src/__tests__/conversation-agent-loop.test.ts +454 -5
  33. package/src/__tests__/conversation-error.test.ts +150 -3
  34. package/src/__tests__/conversation-process-callsite.test.ts +43 -0
  35. package/src/__tests__/conversation-provider-retry-repair.test.ts +6 -0
  36. package/src/__tests__/conversation-runtime-assembly.test.ts +65 -0
  37. package/src/__tests__/conversation-slash-unknown.test.ts +6 -0
  38. package/src/__tests__/conversation-speed-override.test.ts +0 -3
  39. package/src/__tests__/conversation-store.test.ts +0 -18
  40. package/src/__tests__/conversation-surfaces-app-control.test.ts +15 -4
  41. package/src/__tests__/conversation-surfaces-data-persist.test.ts +404 -0
  42. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +2 -5
  43. package/src/__tests__/conversation-workspace-injection.test.ts +6 -0
  44. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +6 -0
  45. package/src/__tests__/credentials-cli.test.ts +7 -0
  46. package/src/__tests__/cu-unified-flow.test.ts +176 -10
  47. package/src/__tests__/date-context.test.ts +164 -2
  48. package/src/__tests__/disk-pressure-guard.test.ts +262 -0
  49. package/src/__tests__/disk-pressure-lifecycle.test.ts +168 -0
  50. package/src/__tests__/disk-pressure-policy.test.ts +241 -0
  51. package/src/__tests__/disk-pressure-routes.test.ts +379 -0
  52. package/src/__tests__/disk-pressure-tools.test.ts +277 -0
  53. package/src/__tests__/disk-usage.test.ts +150 -0
  54. package/src/__tests__/events-client-registration.test.ts +52 -0
  55. package/src/__tests__/events-dev-bypass-actor.test.ts +162 -0
  56. package/src/__tests__/file-write-tool.test.ts +4 -10
  57. package/src/__tests__/filing-service.test.ts +3 -4
  58. package/src/__tests__/heartbeat-disk-pressure.test.ts +183 -0
  59. package/src/__tests__/heartbeat-service.test.ts +260 -11
  60. package/src/__tests__/host-app-control-proxy.test.ts +195 -25
  61. package/src/__tests__/host-bash-proxy.test.ts +227 -34
  62. package/src/__tests__/host-bash-routes.test.ts +178 -13
  63. package/src/__tests__/host-cu-proxy.test.ts +210 -3
  64. package/src/__tests__/host-cu-routes-targeted.test.ts +141 -12
  65. package/src/__tests__/host-file-proxy-targeted.test.ts +48 -9
  66. package/src/__tests__/host-file-proxy.test.ts +268 -6
  67. package/src/__tests__/host-file-routes-targeted.test.ts +175 -17
  68. package/src/__tests__/host-transfer-proxy-targeted.test.ts +408 -59
  69. package/src/__tests__/host-transfer-routes-targeted.test.ts +232 -17
  70. package/src/__tests__/http-user-message-parity.test.ts +107 -1
  71. package/src/__tests__/injector-chain.test.ts +18 -6
  72. package/src/__tests__/injector-disk-pressure.test.ts +224 -0
  73. package/src/__tests__/managed-profile-guard.test.ts +18 -0
  74. package/src/__tests__/mcp-abort-signal.test.ts +130 -0
  75. package/src/__tests__/memory-admin-recall.test.ts +3 -11
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +22 -1
  77. package/src/__tests__/normalize-onboarding.test.ts +180 -0
  78. package/src/__tests__/oauth-connect-routes.test.ts +316 -0
  79. package/src/__tests__/oauth-provider-seed-logos.test.ts +24 -2
  80. package/src/__tests__/onboarding-persona-write.test.ts +308 -0
  81. package/src/__tests__/openai-provider.test.ts +45 -8
  82. package/src/__tests__/persist-onboarding-artifacts.test.ts +44 -64
  83. package/src/__tests__/platform-callback-registration.test.ts +21 -4
  84. package/src/__tests__/platform.test.ts +2 -1
  85. package/src/__tests__/playbook-execution.test.ts +0 -43
  86. package/src/__tests__/plugin-tool-contribution.test.ts +47 -0
  87. package/src/__tests__/prechat-onboarding-contract.test.ts +214 -27
  88. package/src/__tests__/provider-tool-name.test.ts +23 -0
  89. package/src/__tests__/relay-server.test.ts +15 -4
  90. package/src/__tests__/runtime-events-sse.test.ts +4 -8
  91. package/src/__tests__/scheduler-disk-pressure.test.ts +148 -0
  92. package/src/__tests__/secret-ingress-http.test.ts +0 -1
  93. package/src/__tests__/suggestion-routes.test.ts +46 -0
  94. package/src/__tests__/twilio-validation.test.ts +2 -2
  95. package/src/__tests__/workspace-migration-065-bump-stale-heartbeat-interval.test.ts +122 -0
  96. package/src/__tests__/workspace-migration-066-seed-heartbeat-callsite-cost-default.test.ts +285 -0
  97. package/src/__tests__/workspace-migration-068-release-notes-local-timezone.test.ts +90 -0
  98. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +90 -0
  99. package/src/approvals/guardian-decision-primitive.ts +13 -0
  100. package/src/approvals/guardian-request-resolvers.ts +16 -17
  101. package/src/backup/snapshot-lock.ts +2 -27
  102. package/src/bundler/compiler-tools.ts +3 -2
  103. package/src/calls/call-conversation-messages.ts +46 -10
  104. package/src/cli/commands/__tests__/webhooks.test.ts +0 -4
  105. package/src/cli/commands/bash.ts +35 -108
  106. package/src/cli/commands/contacts.ts +64 -25
  107. package/src/cli/commands/credentials.ts +56 -0
  108. package/src/cli/commands/memory-v2.ts +7 -6
  109. package/src/cli/commands/oauth/__tests__/connect.test.ts +437 -1
  110. package/src/cli/commands/oauth/connect.ts +127 -1
  111. package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +0 -3
  112. package/src/cli/commands/platform/__tests__/connect.test.ts +7 -1
  113. package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
  114. package/src/cli/commands/platform/__tests__/status.test.ts +103 -6
  115. package/src/cli/commands/platform/index.ts +16 -7
  116. package/src/cli/commands/status.ts +57 -0
  117. package/src/cli/program.ts +4 -2
  118. package/src/config/assistant-feature-flags.ts +13 -3
  119. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +4 -3
  120. package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +13 -7
  121. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +2 -2
  122. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +2 -2
  123. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +2 -2
  124. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +2 -2
  125. package/src/config/env.ts +0 -8
  126. package/src/config/feature-flag-registry.json +27 -3
  127. package/src/config/loader.ts +127 -8
  128. package/src/config/schemas/__tests__/memory-v2.test.ts +10 -5
  129. package/src/config/schemas/call-site-catalog.ts +14 -0
  130. package/src/config/schemas/channels.ts +0 -5
  131. package/src/config/schemas/heartbeat.ts +1 -1
  132. package/src/config/schemas/llm.ts +2 -0
  133. package/src/config/schemas/memory-lifecycle.ts +13 -0
  134. package/src/config/schemas/memory-v2.ts +75 -11
  135. package/src/config/schemas/platform.ts +43 -3
  136. package/src/config/schemas/services.ts +28 -0
  137. package/src/config/seed-inference-profiles.ts +230 -33
  138. package/src/contacts/contact-store.ts +0 -25
  139. package/src/daemon/__tests__/conversation-tool-setup.test.ts +86 -25
  140. package/src/daemon/assistant-attachments.ts +4 -4
  141. package/src/daemon/config-watcher.ts +85 -57
  142. package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
  143. package/src/daemon/conversation-agent-loop.ts +170 -33
  144. package/src/daemon/conversation-error.ts +87 -15
  145. package/src/daemon/conversation-lifecycle.ts +1 -3
  146. package/src/daemon/conversation-process.ts +8 -0
  147. package/src/daemon/conversation-runtime-assembly.ts +26 -0
  148. package/src/daemon/conversation-store.ts +2 -2
  149. package/src/daemon/conversation-surfaces.ts +195 -15
  150. package/src/daemon/conversation-tool-setup.ts +57 -14
  151. package/src/daemon/conversation.ts +17 -22
  152. package/src/daemon/date-context.ts +71 -22
  153. package/src/daemon/disk-pressure-background-gate.ts +73 -0
  154. package/src/daemon/disk-pressure-guard.ts +343 -0
  155. package/src/daemon/disk-pressure-policy.ts +163 -0
  156. package/src/daemon/handlers/shared.ts +0 -1
  157. package/src/daemon/handlers/skills.ts +3 -4
  158. package/src/daemon/host-app-control-proxy.ts +137 -41
  159. package/src/daemon/host-bash-proxy.ts +46 -21
  160. package/src/daemon/host-cu-proxy.ts +49 -3
  161. package/src/daemon/host-file-proxy.ts +43 -7
  162. package/src/daemon/host-transfer-proxy.ts +95 -4
  163. package/src/daemon/lifecycle.ts +79 -28
  164. package/src/daemon/meet-host-supervisor.ts +4 -4
  165. package/src/daemon/meet-manifest-loader.ts +0 -1
  166. package/src/daemon/memory-v2-startup.ts +14 -4
  167. package/src/daemon/message-protocol.ts +3 -0
  168. package/src/daemon/message-types/conversations.ts +4 -0
  169. package/src/daemon/message-types/disk-pressure.ts +9 -0
  170. package/src/daemon/message-types/messages.ts +3 -0
  171. package/src/daemon/profiler-run-store.ts +5 -5
  172. package/src/daemon/tool-setup-types.ts +2 -2
  173. package/src/documents/document-store.ts +85 -0
  174. package/src/filing/filing-service.ts +30 -5
  175. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +9 -16
  176. package/src/heartbeat/__tests__/heartbeat-run-store.test.ts +36 -0
  177. package/src/heartbeat/heartbeat-run-store.ts +13 -0
  178. package/src/heartbeat/heartbeat-service.ts +205 -31
  179. package/src/home/feed-scheduler.ts +18 -0
  180. package/src/inbound/platform-callback-registration.ts +8 -15
  181. package/src/ipc/__tests__/clients-list-ipc.test.ts +169 -0
  182. package/src/ipc/assistant-server.ts +56 -2
  183. package/src/ipc/gateway-client.ts +37 -3
  184. package/src/live-voice/live-voice-archive.ts +4 -4
  185. package/src/live-voice/protocol.ts +5 -7
  186. package/src/media/image-service.ts +1 -7
  187. package/src/memory/__tests__/fixtures/memory-v2-activation-fixtures.ts +21 -13
  188. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +52 -22
  189. package/src/memory/__tests__/memory-v2-activation-log-store.test.ts +0 -6
  190. package/src/memory/__tests__/memory-v2-concept-frequency.test.ts +272 -0
  191. package/src/memory/admin.ts +5 -9
  192. package/src/memory/context-search/agent-runner.ts +19 -2
  193. package/src/memory/context-search/sources/conversations.ts +2 -11
  194. package/src/memory/context-search/sources/memory-v2.ts +5 -4
  195. package/src/memory/context-search/sources/memory.ts +0 -1
  196. package/src/memory/context-search/types.ts +0 -1
  197. package/src/memory/conversation-crud.ts +4 -12
  198. package/src/memory/db-init.ts +2 -0
  199. package/src/memory/embedding-runtime-manager.ts +119 -5
  200. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +32 -21
  201. package/src/memory/graph/conversation-graph-memory.ts +42 -54
  202. package/src/memory/graph/extraction.ts +1 -3
  203. package/src/memory/graph/graph-search.test.ts +10 -67
  204. package/src/memory/graph/graph-search.ts +1 -20
  205. package/src/memory/graph/retriever.test.ts +6 -0
  206. package/src/memory/graph/retriever.ts +6 -10
  207. package/src/memory/indexer.ts +54 -45
  208. package/src/memory/job-handlers/backfill.ts +2 -11
  209. package/src/memory/job-handlers/cleanup.ts +43 -0
  210. package/src/memory/job-handlers/embedding.ts +6 -8
  211. package/src/memory/job-handlers/summarization.ts +2 -7
  212. package/src/memory/jobs-store.ts +48 -0
  213. package/src/memory/jobs-worker.ts +81 -43
  214. package/src/memory/memory-v2-activation-log-store.ts +32 -14
  215. package/src/memory/memory-v2-concept-frequency.ts +169 -0
  216. package/src/memory/migrations/239-trace-events-created-at-index.ts +18 -0
  217. package/src/memory/migrations/index.ts +1 -0
  218. package/src/memory/pkb/pkb-search.test.ts +6 -0
  219. package/src/memory/qdrant-client.ts +0 -13
  220. package/src/memory/rerank-local.ts +374 -0
  221. package/src/memory/search/semantic.ts +6 -67
  222. package/src/memory/trace-event-store.ts +1 -17
  223. package/src/memory/v2/__tests__/activation.test.ts +311 -250
  224. package/src/memory/v2/__tests__/consolidation-job.test.ts +40 -8
  225. package/src/memory/v2/__tests__/injection.test.ts +157 -167
  226. package/src/memory/v2/__tests__/prompts-consolidation.test.ts +61 -2
  227. package/src/memory/v2/__tests__/qdrant.test.ts +16 -0
  228. package/src/memory/v2/__tests__/reranker.test.ts +338 -0
  229. package/src/memory/v2/__tests__/sim.test.ts +5 -199
  230. package/src/memory/v2/__tests__/skill-store.test.ts +71 -65
  231. package/src/memory/v2/__tests__/static-context.test.ts +76 -1
  232. package/src/memory/v2/activation.ts +149 -156
  233. package/src/memory/v2/consolidation-job.ts +62 -12
  234. package/src/memory/v2/injection.ts +47 -60
  235. package/src/memory/v2/prompts/consolidation.ts +36 -1
  236. package/src/memory/v2/qdrant.ts +99 -0
  237. package/src/memory/v2/reranker.ts +177 -0
  238. package/src/memory/v2/sim.ts +10 -84
  239. package/src/memory/v2/skill-content.ts +4 -3
  240. package/src/memory/v2/skill-store.ts +82 -59
  241. package/src/memory/v2/static-context.ts +22 -0
  242. package/src/memory/v2/types.ts +10 -10
  243. package/src/notifications/copy-composer.ts +13 -0
  244. package/src/notifications/signal.ts +4 -0
  245. package/src/oauth/AGENTS.md +3 -1
  246. package/src/oauth/__tests__/oauth-connect-state.test.ts +137 -0
  247. package/src/oauth/connect-orchestrator.ts +2 -0
  248. package/src/oauth/connection-resolver.test.ts +66 -1
  249. package/src/oauth/connection-resolver.ts +55 -1
  250. package/src/oauth/oauth-connect-state.ts +77 -0
  251. package/src/oauth/seed-providers.ts +58 -1
  252. package/src/plugins/defaults/injectors.ts +35 -2
  253. package/src/plugins/defaults/memory-retrieval.ts +5 -6
  254. package/src/plugins/types.ts +7 -0
  255. package/src/proactive-artifact/aux-message-injector.ts +74 -0
  256. package/src/proactive-artifact/decision.test.ts +226 -0
  257. package/src/proactive-artifact/decision.ts +165 -0
  258. package/src/proactive-artifact/index.ts +7 -0
  259. package/src/proactive-artifact/job.test.ts +867 -0
  260. package/src/proactive-artifact/job.ts +352 -0
  261. package/src/proactive-artifact/message-copy.ts +41 -0
  262. package/src/proactive-artifact/trigger-state.test.ts +277 -0
  263. package/src/proactive-artifact/trigger-state.ts +119 -0
  264. package/src/prompts/normalize-onboarding.ts +80 -0
  265. package/src/prompts/persona-resolver.ts +101 -9
  266. package/src/prompts/system-prompt.ts +21 -7
  267. package/src/prompts/templates/BOOTSTRAP.md +13 -5
  268. package/src/providers/__tests__/retry-callsite.test.ts +222 -1
  269. package/src/providers/model-intents.ts +7 -0
  270. package/src/providers/openrouter/client.ts +8 -0
  271. package/src/providers/retry.ts +50 -0
  272. package/src/providers/types.ts +1 -0
  273. package/src/runtime/__tests__/agent-wake.test.ts +456 -3
  274. package/src/runtime/agent-wake.ts +238 -100
  275. package/src/runtime/assistant-event-hub.ts +36 -6
  276. package/src/runtime/assistant-event.ts +0 -1
  277. package/src/runtime/auth/__tests__/route-policy.test.ts +64 -0
  278. package/src/runtime/auth/route-policy.ts +14 -1
  279. package/src/runtime/auth/same-actor.ts +216 -0
  280. package/src/runtime/channel-retry-sweep.ts +65 -1
  281. package/src/runtime/guardian-reply-router.ts +10 -0
  282. package/src/runtime/local-actor-identity.ts +52 -11
  283. package/src/runtime/pending-interactions.ts +8 -0
  284. package/src/runtime/routes/__tests__/client-routes.test.ts +155 -0
  285. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +0 -5
  286. package/src/runtime/routes/__tests__/heartbeat-routes.test.ts +1 -1
  287. package/src/runtime/routes/client-routes.ts +20 -2
  288. package/src/runtime/routes/contact-routes.ts +0 -25
  289. package/src/runtime/routes/conversation-routes.ts +35 -26
  290. package/src/runtime/routes/debug-bash-routes.ts +163 -0
  291. package/src/runtime/routes/disk-pressure-routes.ts +121 -0
  292. package/src/runtime/routes/document-pdf-renderer.ts +6 -2
  293. package/src/runtime/routes/documents-routes.ts +2 -75
  294. package/src/runtime/routes/events-routes.ts +41 -9
  295. package/src/runtime/routes/host-bash-routes.ts +23 -3
  296. package/src/runtime/routes/host-cu-routes.ts +33 -6
  297. package/src/runtime/routes/host-file-routes.ts +32 -6
  298. package/src/runtime/routes/host-transfer-routes.ts +79 -16
  299. package/src/runtime/routes/identity-routes.ts +7 -138
  300. package/src/runtime/routes/inbound-message-handler.ts +77 -12
  301. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +3 -0
  302. package/src/runtime/routes/index.ts +6 -0
  303. package/src/runtime/routes/memory-item-routes.test.ts +41 -15
  304. package/src/runtime/routes/memory-v2-routes.ts +33 -0
  305. package/src/runtime/routes/oauth-connect-routes.ts +153 -0
  306. package/src/runtime/verification-outbound-actions.ts +4 -4
  307. package/src/schedule/run-script.ts +37 -5
  308. package/src/schedule/scheduler.ts +20 -1
  309. package/src/security/encrypted-store.ts +2 -0
  310. package/src/security/secure-keys.ts +55 -0
  311. package/src/skills/remote-skill-policy.ts +4 -10
  312. package/src/subagent/index.ts +1 -7
  313. package/src/subagent/manager.ts +1 -15
  314. package/src/tasks/task-runner.ts +0 -1
  315. package/src/tasks/task-store.ts +0 -3
  316. package/src/tools/background-tool-registry.ts +17 -3
  317. package/src/tools/host-filesystem/edit.test.ts +151 -0
  318. package/src/tools/host-filesystem/edit.ts +43 -1
  319. package/src/tools/host-filesystem/read.test.ts +129 -0
  320. package/src/tools/host-filesystem/read.ts +43 -1
  321. package/src/tools/host-filesystem/transfer.test.ts +127 -2
  322. package/src/tools/host-filesystem/transfer.ts +56 -11
  323. package/src/tools/host-filesystem/write.test.ts +134 -0
  324. package/src/tools/host-filesystem/write.ts +43 -1
  325. package/src/tools/host-terminal/host-shell.ts +13 -6
  326. package/src/tools/mcp/mcp-tool-factory.ts +2 -1
  327. package/src/tools/memory/register.test.ts +12 -9
  328. package/src/tools/memory/register.ts +1 -2
  329. package/src/tools/provider-tool-name.ts +28 -0
  330. package/src/tools/registry.ts +30 -9
  331. package/src/tools/terminal/shell.ts +9 -1
  332. package/src/tools/tool-approval-handler.ts +31 -6
  333. package/src/tools/types.ts +24 -2
  334. package/src/tts/provider-catalog.ts +3 -5
  335. package/src/util/disk-usage.ts +138 -0
  336. package/src/util/platform.ts +21 -11
  337. package/src/util/process-liveness.ts +26 -0
  338. package/src/workspace/heartbeat-service.ts +19 -0
  339. package/src/workspace/migrations/065-bump-stale-heartbeat-interval.ts +60 -0
  340. package/src/workspace/migrations/066-seed-heartbeat-callsite-cost-default.ts +146 -0
  341. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +72 -0
  342. package/src/workspace/migrations/068-release-notes-local-timezone.ts +65 -0
  343. package/src/workspace/migrations/registry.ts +8 -0
  344. package/src/__tests__/conversation-tool-setup-memory-scope.test.ts +0 -167
  345. package/src/memory/v2/__tests__/skill-qdrant.test.ts +0 -657
  346. package/src/memory/v2/skill-qdrant.ts +0 -404
  347. package/src/signals/bash.ts +0 -198
@@ -29,21 +29,18 @@ import { getWorkspaceDir } from "../../util/platform.js";
29
29
  import type { DrizzleDb } from "../db-connection.js";
30
30
  import {
31
31
  type MemoryV2ConceptRowRecord,
32
- type MemoryV2SkillRowRecord,
33
32
  recordMemoryV2ActivationLog,
34
33
  } from "../memory-v2-activation-log-store.js";
35
34
  import {
36
35
  computeOwnActivation,
37
- computeSkillActivation,
38
36
  selectCandidates,
39
37
  selectInjections,
40
- selectSkillInjections,
41
38
  spreadActivation,
42
39
  } from "./activation.js";
43
40
  import { hydrate, save } from "./activation-store.js";
44
41
  import { getEdgeIndex } from "./edge-index.js";
45
42
  import { readPage, renderPageContent } from "./page-store.js";
46
- import { getAllSkillIds, getSkillCapability } from "./skill-store.js";
43
+ import { getSkillCapability, isSkillSlug } from "./skill-store.js";
47
44
  import type { ActivationState, EverInjectedEntry } from "./types.js";
48
45
 
49
46
  const log = getLogger("memory-v2-injection");
@@ -84,6 +81,7 @@ export interface InjectMemoryV2BlockParams {
84
81
  */
85
82
  mode?: InjectMemoryV2Mode;
86
83
  config: AssistantConfig;
84
+ signal?: AbortSignal;
87
85
  }
88
86
 
89
87
  export interface InjectMemoryV2BlockResult {
@@ -127,30 +125,36 @@ export async function injectMemoryV2Block(
127
125
  nowText,
128
126
  messageId,
129
127
  config,
128
+ signal,
130
129
  } = params;
131
130
 
132
131
  const workspaceDir = getWorkspaceDir();
133
132
 
134
133
  // (1) Hydrate. Missing rows are normal at conversation start — proceed
135
134
  // with an effective empty prior state so the first turn can still inject.
135
+ throwIfAborted(signal);
136
136
  const priorState = await hydrate(database, conversationId);
137
137
 
138
138
  // (2) Topology. `getEdgeIndex` walks concept-page frontmatter and caches
139
139
  // the result module-locally; an empty workspace yields an empty index.
140
+ throwIfAborted(signal);
140
141
  const edgeIndex = await getEdgeIndex(workspaceDir);
141
142
 
142
143
  // (3) Candidate set: prior-state survivors above epsilon ∪ ANN top-50.
143
144
  // `selectCandidates` also returns `fromPrior` / `fromAnn` provenance sets so
144
145
  // telemetry can attribute each candidate back to its source.
146
+ throwIfAborted(signal);
145
147
  const { candidates, fromPrior, fromAnn } = await selectCandidates({
146
148
  priorState,
147
149
  userText: userMessage,
148
150
  assistantText: assistantMessage,
149
151
  nowText,
150
152
  config,
153
+ signal,
151
154
  });
152
155
 
153
156
  // (4) Own activation: A_o = d·prev + c_user·sim_u + c_a·sim_a + c_now·sim_n.
157
+ throwIfAborted(signal);
154
158
  const { activation: ownActivation, breakdown: ownBreakdown } =
155
159
  await computeOwnActivation({
156
160
  candidates,
@@ -159,9 +163,11 @@ export async function injectMemoryV2Block(
159
163
  assistantText: assistantMessage,
160
164
  nowText,
161
165
  config,
166
+ signal,
162
167
  });
163
168
 
164
169
  // (5) Spreading activation across the edge graph (k, hops from config).
170
+ throwIfAborted(signal);
165
171
  const { k, hops, top_k, epsilon } = config.memory.v2;
166
172
  const { final: finalActivation, contribution: spreadContribution } =
167
173
  spreadActivation(ownActivation, edgeIndex, k, hops);
@@ -182,25 +188,6 @@ export async function injectMemoryV2Block(
182
188
  });
183
189
  const slugsToRender = mode === "context-load" ? topNow : toInject;
184
190
 
185
- // (6b) Skill pipeline — a sibling pipeline to the concept-page one above.
186
- // Skills are stateless (no decay, no spread, no `everInjected` dedup) and
187
- // the catalog is small, so every known skill is scored every turn. The
188
- // top-K injection slate is re-presented every turn so the agent can drop
189
- // and pick skills up freely; the inspector renders the full ranked list.
190
- const skillCandidates = new Set(getAllSkillIds());
191
- const { activation: skillActivation, breakdown: skillBreakdown } =
192
- await computeSkillActivation({
193
- candidates: skillCandidates,
194
- userText: userMessage,
195
- assistantText: assistantMessage,
196
- nowText,
197
- config,
198
- });
199
- const { topNow: topSkillIds } = selectSkillInjections({
200
- A: skillActivation,
201
- topK: config.memory.v2.top_k_skills,
202
- });
203
-
204
191
  // Build the next persisted state regardless of whether we render anything:
205
192
  // even on a "no new injection" turn, prior-state activations decay via the
206
193
  // candidate-set carry-forward and need to be rewritten so `epsilon`-trimmed
@@ -215,8 +202,10 @@ export async function injectMemoryV2Block(
215
202
  // just rendered all of them); on per-turn it's just the newly added slugs.
216
203
  // We append rather than reset so that compaction-driven eviction
217
204
  // (`evictCompactedTurns`) is the only path that can re-enable a previously-
218
- // injected slug. Skills do NOT enter `everInjected` they are stateless
219
- // and re-presented every turn.
205
+ // injected slug. Skill slugs (`skills/<id>`) participate in this dedup just
206
+ // like concept slugs — once attached on a turn, the cached attachment lives
207
+ // on that user message and the agent keeps seeing it across subsequent turns
208
+ // until compaction evicts the turn.
220
209
  const everInjectedSet = new Set(priorEverInjected.map((entry) => entry.slug));
221
210
  const newlyInjected = slugsToRender.filter(
222
211
  (slug) => !everInjectedSet.has(slug),
@@ -243,7 +232,6 @@ export async function injectMemoryV2Block(
243
232
  const { block, missingSlugs } = await renderInjectionBlock(
244
233
  workspaceDir,
245
234
  slugsToRender,
246
- topSkillIds,
247
235
  );
248
236
  const missingSlugSet = new Set(missingSlugs);
249
237
  if (missingSlugs.length > 0) {
@@ -262,7 +250,6 @@ export async function injectMemoryV2Block(
262
250
  // block memory injection.
263
251
  const toInjectSet = new Set(toInject);
264
252
  const renderedSet = new Set(slugsToRender);
265
- const topSkillIdSet = new Set(topSkillIds);
266
253
  const conceptRows: MemoryV2ConceptRowRecord[] = [...candidates].map(
267
254
  (slug) => {
268
255
  const breakdown = ownBreakdown.get(slug);
@@ -301,6 +288,9 @@ export async function injectMemoryV2Block(
301
288
  simUser: breakdown?.simUser ?? 0,
302
289
  simAssistant: breakdown?.simAssistant ?? 0,
303
290
  simNow: breakdown?.simNow ?? 0,
291
+ simUserRerankBoost: breakdown?.simUserRerankBoost ?? 0,
292
+ simAssistantRerankBoost: breakdown?.simAssistantRerankBoost ?? 0,
293
+ inRerankPool: breakdown?.inRerankPool ?? false,
304
294
  spreadContribution: spreadContribution.get(slug) ?? 0,
305
295
  source:
306
296
  inPrior && inAnn ? "both" : inPrior ? "prior_state" : "ann_top50",
@@ -310,19 +300,6 @@ export async function injectMemoryV2Block(
310
300
  );
311
301
  conceptRows.sort((a, b) => b.finalActivation - a.finalActivation);
312
302
 
313
- const skillRows: MemoryV2SkillRowRecord[] = [...skillCandidates].map((id) => {
314
- const breakdown = skillBreakdown.get(id);
315
- return {
316
- id,
317
- activation: skillActivation.get(id) ?? 0,
318
- simUser: breakdown?.simUser ?? 0,
319
- simAssistant: breakdown?.simAssistant ?? 0,
320
- simNow: breakdown?.simNow ?? 0,
321
- status: topSkillIdSet.has(id) ? "injected" : "not_injected",
322
- };
323
- });
324
- skillRows.sort((a, b) => b.activation - a.activation);
325
-
326
303
  const v2Cfg = config.memory.v2;
327
304
  try {
328
305
  recordMemoryV2ActivationLog({
@@ -330,7 +307,6 @@ export async function injectMemoryV2Block(
330
307
  turn: currentTurn,
331
308
  mode,
332
309
  concepts: conceptRows,
333
- skills: skillRows,
334
310
  config: {
335
311
  d: v2Cfg.d,
336
312
  c_user: v2Cfg.c_user,
@@ -339,7 +315,6 @@ export async function injectMemoryV2Block(
339
315
  k: v2Cfg.k,
340
316
  hops: v2Cfg.hops,
341
317
  top_k: v2Cfg.top_k,
342
- top_k_skills: v2Cfg.top_k_skills,
343
318
  epsilon: v2Cfg.epsilon,
344
319
  },
345
320
  });
@@ -353,6 +328,12 @@ export async function injectMemoryV2Block(
353
328
  return { block, toInject: newlyInjected };
354
329
  }
355
330
 
331
+ function throwIfAborted(signal: AbortSignal | undefined): void {
332
+ if (signal?.aborted) {
333
+ throw new DOMException("Aborted", "AbortError");
334
+ }
335
+ }
336
+
356
337
  // ---------------------------------------------------------------------------
357
338
  // Internal helpers
358
339
  // ---------------------------------------------------------------------------
@@ -380,9 +361,14 @@ interface RenderInjectionBlockResult {
380
361
  }
381
362
 
382
363
  /**
383
- * Render the inner content of the `<memory>` block for a list of slugs and
384
- * a list of ranked skill ids. The caller wraps the result in
385
- * `<memory>...</memory>` exactly once at injection time.
364
+ * Render the inner content of the `<memory>` block for a list of slugs.
365
+ * The caller wraps the result in `<memory>...</memory>` exactly once at
366
+ * injection time.
367
+ *
368
+ * The slug list is partitioned by prefix: slugs starting with `skills/`
369
+ * resolve to a `SkillEntry` via `getSkillCapability` and render under the
370
+ * trailing `### Skills You Can Use` subsection; everything else is read
371
+ * from disk via `readPage` and rendered as a concept-page section.
386
372
  *
387
373
  * Concept pages are read in parallel via `readPage`. Pages whose file has
388
374
  * gone missing between selection and render (e.g. consolidation deleted
@@ -390,17 +376,17 @@ interface RenderInjectionBlockResult {
390
376
  * block but reported back via `missingSlugs` so callers can surface the
391
377
  * divergence.
392
378
  *
393
- * Skill ids are looked up via `getSkillCapability`. Ids that the cache no
394
- * longer knows (e.g. uninstalled mid-run) are silently dropped, mirroring
395
- * the missing-pages behavior.
379
+ * Skill slugs whose entry the cache no longer knows (e.g. uninstalled
380
+ * mid-run) are silently dropped, mirroring the missing-pages behavior but
381
+ * without entering `missingSlugs` — the skill catalog is the source of
382
+ * truth for skill availability, not on-disk concept pages, so a missing
383
+ * skill is an expected catalog-level outcome rather than a stale-index
384
+ * bug.
396
385
  *
397
- * The block shape is the §5 layout from the design doc, with an optional
398
- * trailing skills subsection. Each concept-page section reproduces the page
399
- * as it lives on disk — frontmatter (`edges`, `ref_files`) plus body — so
400
- * the agent sees the page's edges and any referenced media paths alongside
401
- * the prose:
386
+ * The block shape mirrors the §5 layout concept-page sections first,
387
+ * skills subsection last preserving the prompt format the agent sees:
402
388
  *
403
- * ### <slug-1>
389
+ * ### <concept-slug-1>
404
390
  * ---
405
391
  * edges:
406
392
  * - <neighbor-slug>
@@ -409,7 +395,7 @@ interface RenderInjectionBlockResult {
409
395
  * ---
410
396
  * <body-1>
411
397
  *
412
- * ### <slug-2>
398
+ * ### <concept-slug-2>
413
399
  * ---
414
400
  * edges: []
415
401
  * ref_files: []
@@ -423,10 +409,12 @@ interface RenderInjectionBlockResult {
423
409
  async function renderInjectionBlock(
424
410
  workspaceDir: string,
425
411
  slugs: string[],
426
- skillIds: string[],
427
412
  ): Promise<RenderInjectionBlockResult> {
413
+ const conceptSlugs = slugs.filter((s) => !isSkillSlug(s));
414
+ const skillSlugs = slugs.filter((s) => isSkillSlug(s));
415
+
428
416
  const pages = await Promise.all(
429
- slugs.map(async (slug) => {
417
+ conceptSlugs.map(async (slug) => {
430
418
  const page = await readPage(workspaceDir, slug);
431
419
  return { slug, page };
432
420
  }),
@@ -444,10 +432,9 @@ async function renderInjectionBlock(
444
432
  sections.push(`### ${slug}\n${content}`);
445
433
  }
446
434
 
447
- // v2's skills collection is skills-only, so the activation suffix always applies.
448
435
  const skillLines: string[] = [];
449
- for (const id of skillIds) {
450
- const entry = getSkillCapability(id);
436
+ for (const slug of skillSlugs) {
437
+ const entry = getSkillCapability(slug);
451
438
  if (!entry) continue;
452
439
  skillLines.push(`- ${entry.content} → use skill_load to activate`);
453
440
  }
@@ -16,7 +16,7 @@
16
16
  * the convention established for the sweep prompt.
17
17
  */
18
18
 
19
- import { readFileSync } from "node:fs";
19
+ import { lstatSync, readFileSync } from "node:fs";
20
20
  import { homedir } from "node:os";
21
21
  import { isAbsolute, join } from "node:path";
22
22
 
@@ -28,6 +28,14 @@ const log = getLogger("memory-v2-consolidate-prompt");
28
28
  /** Sentinel substituted with the cutoff timestamp at runtime. */
29
29
  export const CUTOFF_PLACEHOLDER = "{{CUTOFF}}";
30
30
 
31
+ /**
32
+ * Upper bound for the override file. Real consolidation prompts are kilobytes;
33
+ * 1 MiB is generous headroom while preventing a `settings.write` principal from
34
+ * pointing the field at a multi-gigabyte file (or `/dev/zero`-like stream that
35
+ * `lstat` can't size cap on its own) and exfiltrating it through the wake hint.
36
+ */
37
+ const MAX_PROMPT_BYTES = 1 * 1024 * 1024;
38
+
31
39
  /**
32
40
  * Consolidation prompt — live-mode only. The agent runs as itself (full
33
41
  * SOUL.md + IDENTITY.md + persona + memory autoloads) with the standard
@@ -447,6 +455,33 @@ export function resolveConsolidationPrompt(
447
455
  const resolvedPath = resolveOverridePath(overridePath);
448
456
  let contents: string;
449
457
  try {
458
+ const stat = lstatSync(resolvedPath);
459
+ if (!stat.isFile()) {
460
+ log.warn(
461
+ {
462
+ configuredPath: overridePath,
463
+ resolvedPath,
464
+ reason: "not_regular_file",
465
+ fallback: "bundled",
466
+ },
467
+ "consolidation prompt override is not a regular file; using bundled prompt",
468
+ );
469
+ return renderConsolidationPrompt(cutoff);
470
+ }
471
+ if (stat.size > MAX_PROMPT_BYTES) {
472
+ log.warn(
473
+ {
474
+ configuredPath: overridePath,
475
+ resolvedPath,
476
+ size: stat.size,
477
+ limit: MAX_PROMPT_BYTES,
478
+ reason: "oversized_override",
479
+ fallback: "bundled",
480
+ },
481
+ "consolidation prompt override exceeds size limit; using bundled prompt",
482
+ );
483
+ return renderConsolidationPrompt(cutoff);
484
+ }
450
485
  contents = readFileSync(resolvedPath, "utf-8");
451
486
  } catch (err) {
452
487
  const code = (err as NodeJS.ErrnoException).code;
@@ -62,6 +62,7 @@ export interface ConceptPageQueryResult {
62
62
 
63
63
  let _client: QdrantRestClient | null = null;
64
64
  let _collectionReady = false;
65
+ let _collectionReadyPromise: Promise<void> | null = null;
65
66
 
66
67
  /** Lazily create a Qdrant REST client bound to the resolved URL. */
67
68
  function getClient(): QdrantRestClient {
@@ -85,7 +86,15 @@ function getClient(): QdrantRestClient {
85
86
  */
86
87
  export async function ensureConceptPageCollection(): Promise<void> {
87
88
  if (_collectionReady) return;
89
+ if (_collectionReadyPromise) return _collectionReadyPromise;
88
90
 
91
+ _collectionReadyPromise = ensureConceptPageCollectionOnce().finally(() => {
92
+ _collectionReadyPromise = null;
93
+ });
94
+ return _collectionReadyPromise;
95
+ }
96
+
97
+ async function ensureConceptPageCollectionOnce(): Promise<void> {
89
98
  const client = getClient();
90
99
  const config = getConfig();
91
100
  const vectorSize = config.memory.qdrant.vectorSize;
@@ -215,6 +224,95 @@ export async function deleteConceptPageEmbedding(slug: string): Promise<void> {
215
224
  }
216
225
  }
217
226
 
227
+ /**
228
+ * Remove every point whose slug starts with the given prefix and whose
229
+ * remaining suffix is not in `activeSuffixes`. Used by the skill-seed flow to
230
+ * drop stale `skills/<id>` slugs after a skill is uninstalled or disabled,
231
+ * since skills now share the concept-page collection rather than living in a
232
+ * dedicated one.
233
+ *
234
+ * Idempotent: when the live `<prefix>*` slugs already match `activeSuffixes`,
235
+ * the function performs a single scroll and no deletes.
236
+ */
237
+ export async function pruneSlugsWithPrefixExcept(
238
+ prefix: string,
239
+ activeSuffixes: readonly string[],
240
+ ): Promise<void> {
241
+ await ensureConceptPageCollection();
242
+
243
+ const client = getClient();
244
+ const activeSet = new Set(activeSuffixes);
245
+
246
+ const doPrune = async (): Promise<void> => {
247
+ const stalePointIds: Array<string | number> = [];
248
+ let offset: string | number | undefined = undefined;
249
+ const maxIterations = 10_000;
250
+ const batchSize = 256;
251
+ for (let i = 0; i < maxIterations; i++) {
252
+ const result = await client.scroll(MEMORY_V2_COLLECTION, {
253
+ limit: batchSize,
254
+ with_payload: true,
255
+ with_vector: false,
256
+ ...(offset !== undefined ? { offset } : {}),
257
+ });
258
+ for (const point of result.points) {
259
+ const slug = (point.payload as { slug?: unknown } | null)?.slug;
260
+ if (typeof slug !== "string") continue;
261
+ if (!slug.startsWith(prefix)) continue;
262
+ const suffix = slug.slice(prefix.length);
263
+ if (!activeSet.has(suffix)) {
264
+ stalePointIds.push(point.id);
265
+ }
266
+ }
267
+ const next = result.next_page_offset;
268
+ if (next == null) break;
269
+ offset = typeof next === "string" ? next : (next as number);
270
+ }
271
+
272
+ if (stalePointIds.length === 0) return;
273
+
274
+ await client.delete(MEMORY_V2_COLLECTION, {
275
+ wait: true,
276
+ points: stalePointIds,
277
+ });
278
+ };
279
+
280
+ try {
281
+ await doPrune();
282
+ } catch (err) {
283
+ if (isCollectionMissing(err)) {
284
+ _collectionReady = false;
285
+ await ensureConceptPageCollection();
286
+ await doPrune();
287
+ return;
288
+ }
289
+ throw err;
290
+ }
291
+ }
292
+
293
+ /**
294
+ * Best-effort delete of the legacy `memory_v2_skills` Qdrant collection. Skill
295
+ * embeddings now live alongside concept pages in `memory_v2_concept_pages`
296
+ * under the `skills/<id>` slug prefix, so the dedicated collection is dead
297
+ * weight on installs upgraded from the split-collection era. Fire-and-forget:
298
+ * on a fresh install (collection never existed) or a transient Qdrant
299
+ * unavailable, we log and move on.
300
+ */
301
+ export async function dropLegacySkillsCollection(): Promise<void> {
302
+ try {
303
+ const client = getClient();
304
+ const exists = await client.collectionExists("memory_v2_skills");
305
+ if (!exists.exists) return;
306
+ await client.deleteCollection("memory_v2_skills");
307
+ log.info("Deleted legacy memory_v2_skills Qdrant collection");
308
+ } catch (err) {
309
+ log.warn(
310
+ { err },
311
+ "Failed to drop legacy memory_v2_skills collection — non-fatal",
312
+ );
313
+ }
314
+ }
315
+
218
316
  /**
219
317
  * Run separate dense and sparse queries against the concept-page collection
220
318
  * and return per-channel scores per slug. Callers fuse these — typically via
@@ -437,4 +535,5 @@ function pointIdForSlug(slug: string): string {
437
535
  export function _resetMemoryV2QdrantForTests(): void {
438
536
  _client = null;
439
537
  _collectionReady = false;
538
+ _collectionReadyPromise = null;
440
539
  }
@@ -0,0 +1,177 @@
1
+ /** Memory v2 cross-encoder rerank — `(query, page-preview)` pairs scored by a local model. */
2
+
3
+ import { createHash } from "node:crypto";
4
+
5
+ import type { AssistantConfig } from "../../config/types.js";
6
+ import { getLogger } from "../../util/logger.js";
7
+ import { getWorkspaceDir } from "../../util/platform.js";
8
+ import { getOrCreateRerankBackend } from "../rerank-local.js";
9
+ import { readPage } from "./page-store.js";
10
+
11
+ const log = getLogger("memory-v2-reranker");
12
+
13
+ // ~512-token model context for bge-reranker-base; cap input to bound payload.
14
+ const PASSAGE_CHAR_CAP = 240;
15
+
16
+ interface CacheEntry {
17
+ scores: Map<string, number>;
18
+ ts: number;
19
+ }
20
+
21
+ const CACHE_TTL_MS = 2 * 60 * 1000;
22
+ const CACHE_MAX_ENTRIES = 64;
23
+ const cache = new Map<string, CacheEntry>();
24
+
25
+ function cacheKey(query: string, slugs: readonly string[]): string {
26
+ const sorted = [...slugs].sort().join("\0");
27
+ return createHash("sha256").update(`${query}\0${sorted}`).digest("hex");
28
+ }
29
+
30
+ function evictExpired(now: number): void {
31
+ for (const [k, v] of cache) {
32
+ if (now - v.ts > CACHE_TTL_MS) cache.delete(k);
33
+ }
34
+ if (cache.size > CACHE_MAX_ENTRIES) {
35
+ const toDrop = cache.size - CACHE_MAX_ENTRIES;
36
+ let i = 0;
37
+ for (const k of cache.keys()) {
38
+ if (i++ >= toDrop) break;
39
+ cache.delete(k);
40
+ }
41
+ }
42
+ }
43
+
44
+ function buildPassage(slug: string, body: string): string {
45
+ const trimmed = body.replace(/^\s+/, "");
46
+ const blank = trimmed.search(/\n\s*\n/);
47
+ const para = blank === -1 ? trimmed : trimmed.slice(0, blank);
48
+ const stripped = para.replace(/^#+\s.*\n/, "").trim();
49
+ const compact = stripped.replace(/\s+/g, " ").slice(0, PASSAGE_CHAR_CAP);
50
+ return `${slug}\n${compact}`;
51
+ }
52
+
53
+ /**
54
+ * Run the cross-encoder over each candidate's first-paragraph preview for
55
+ * one or more queries against the same candidate set. Returns one
56
+ * `Map<slug, score>` per query, in the same order as the `queries` array.
57
+ *
58
+ * Multi-query batching: the user-channel and assistant-channel queries share
59
+ * a candidate set per turn, so scoring them in a single tokenizer +
60
+ * forward-pass call avoids the ONNX-invocation overhead of two serialised
61
+ * worker round-trips. Cache hits short-circuit per-query independently —
62
+ * a whitespace-only query yields an empty Map without hitting the backend.
63
+ *
64
+ * Failures (worker down, page read errors) yield empty Maps so callers can
65
+ * fall back to pure fused scores. Per-batch normalisation and boost math
66
+ * live in `computeOwnActivation`.
67
+ */
68
+ export async function rerankCandidates(
69
+ queries: readonly string[],
70
+ candidates: readonly string[],
71
+ config: AssistantConfig,
72
+ ): Promise<Array<Map<string, number>>> {
73
+ if (queries.length === 0) return [];
74
+ if (candidates.length === 0) return queries.map(() => new Map());
75
+
76
+ const now = Date.now();
77
+ evictExpired(now);
78
+
79
+ const results: Array<Map<string, number> | null> = queries.map(() => null);
80
+ const uncachedIndices: number[] = [];
81
+ for (let i = 0; i < queries.length; i++) {
82
+ const q = queries[i];
83
+ if (q.trim().length === 0) {
84
+ results[i] = new Map();
85
+ continue;
86
+ }
87
+ const key = cacheKey(q, candidates);
88
+ const cached = cache.get(key);
89
+ if (cached) {
90
+ // Refresh insertion order so frequently-hit entries survive eviction.
91
+ cache.delete(key);
92
+ cache.set(key, { ...cached, ts: now });
93
+ results[i] = new Map(cached.scores);
94
+ } else {
95
+ uncachedIndices.push(i);
96
+ }
97
+ }
98
+
99
+ const finalize = (): Array<Map<string, number>> =>
100
+ results.map((r) => r ?? new Map());
101
+
102
+ if (uncachedIndices.length === 0) return finalize();
103
+
104
+ const workspaceDir = getWorkspaceDir();
105
+ const pages = await Promise.all(
106
+ candidates.map((slug) =>
107
+ readPage(workspaceDir, slug).catch((err) => {
108
+ log.debug({ err, slug }, "Reranker skipping page that failed to load");
109
+ return null;
110
+ }),
111
+ ),
112
+ );
113
+ const passages: string[] = [];
114
+ const slugsForPassages: string[] = [];
115
+ for (let i = 0; i < candidates.length; i++) {
116
+ const page = pages[i];
117
+ if (!page) continue;
118
+ passages.push(buildPassage(candidates[i], page.body));
119
+ slugsForPassages.push(candidates[i]);
120
+ }
121
+
122
+ if (passages.length === 0) {
123
+ for (const i of uncachedIndices) results[i] = new Map();
124
+ return finalize();
125
+ }
126
+
127
+ // One tokenizer + ONNX forward pass over every uncached query × passage
128
+ // pair. Pairs are laid out query-major: queries[uncached[0]] × passages,
129
+ // then queries[uncached[1]] × passages, etc.
130
+ const batchQueries: string[] = [];
131
+ const batchPassages: string[] = [];
132
+ for (const qi of uncachedIndices) {
133
+ const q = queries[qi];
134
+ for (const p of passages) {
135
+ batchQueries.push(q);
136
+ batchPassages.push(p);
137
+ }
138
+ }
139
+
140
+ const { model, dtype } = config.memory.v2.rerank;
141
+ let scores: number[];
142
+ try {
143
+ const backend = getOrCreateRerankBackend(model, dtype);
144
+ scores = await backend.score(batchQueries, batchPassages);
145
+ } catch (err) {
146
+ log.warn(
147
+ { err, model, n: batchPassages.length },
148
+ "Rerank backend failed; falling back to pure fused scores",
149
+ );
150
+ for (const i of uncachedIndices) results[i] = new Map();
151
+ return finalize();
152
+ }
153
+
154
+ for (let j = 0; j < uncachedIndices.length; j++) {
155
+ const qi = uncachedIndices[j];
156
+ const offset = j * passages.length;
157
+ const result = new Map<string, number>();
158
+ for (let i = 0; i < slugsForPassages.length; i++) {
159
+ const s = scores[offset + i];
160
+ if (typeof s !== "number" || Number.isNaN(s)) continue;
161
+ // sigmoid output should already be in [0, 1]; clamp defensively.
162
+ result.set(slugsForPassages[i], Math.max(0, Math.min(1, s)));
163
+ }
164
+ results[qi] = result;
165
+ cache.set(cacheKey(queries[qi], candidates), {
166
+ scores: new Map(result),
167
+ ts: now,
168
+ });
169
+ }
170
+
171
+ return finalize();
172
+ }
173
+
174
+ /** @internal Test-only: clear the LRU cache. */
175
+ export function _resetRerankCacheForTests(): void {
176
+ cache.clear();
177
+ }