@vellumai/assistant 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/ARCHITECTURE.md +16 -1
  2. package/docs/architecture/memory.md +5 -2
  3. package/node_modules/@vellumai/gateway-client/src/ipc-client.ts +13 -4
  4. package/node_modules/@vellumai/skill-host-contracts/src/assistant-event.ts +0 -9
  5. package/node_modules/@vellumai/slack-text/src/index.test.ts +18 -35
  6. package/node_modules/@vellumai/slack-text/src/index.ts +2 -48
  7. package/openapi.yaml +449 -22
  8. package/package.json +1 -1
  9. package/src/__tests__/app-control-flow.test.ts +21 -11
  10. package/src/__tests__/assistant-event-hub.test.ts +48 -0
  11. package/src/__tests__/assistant-event.test.ts +0 -10
  12. package/src/__tests__/assistant-events-sse-hardening.test.ts +2 -7
  13. package/src/__tests__/assistant-feature-flags-integration.test.ts +18 -0
  14. package/src/__tests__/auto-analysis-end-to-end.test.ts +62 -1
  15. package/src/__tests__/background-workers-disk-pressure.test.ts +268 -0
  16. package/src/__tests__/call-conversation-messages.test.ts +8 -2
  17. package/src/__tests__/channel-inbound-disk-pressure.test.ts +537 -0
  18. package/src/__tests__/channel-readiness-service.test.ts +4 -2
  19. package/src/__tests__/config-loader-backfill.test.ts +379 -0
  20. package/src/__tests__/config-schema.test.ts +1 -0
  21. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +18 -9
  22. package/src/__tests__/config-watcher.test.ts +140 -69
  23. package/src/__tests__/context-search-agent-runner.test.ts +61 -3
  24. package/src/__tests__/context-search-conversations-source.test.ts +0 -24
  25. package/src/__tests__/context-search-fanout.test.ts +0 -1
  26. package/src/__tests__/context-search-memory-source.test.ts +3 -7
  27. package/src/__tests__/context-search-memory-v2-source.test.ts +0 -2
  28. package/src/__tests__/context-search-pkb-source.test.ts +0 -1
  29. package/src/__tests__/context-search-workspace-source.test.ts +0 -1
  30. package/src/__tests__/conversation-abort-tool-results.test.ts +6 -0
  31. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +223 -0
  32. package/src/__tests__/conversation-agent-loop.test.ts +454 -5
  33. package/src/__tests__/conversation-error.test.ts +150 -3
  34. package/src/__tests__/conversation-process-callsite.test.ts +43 -0
  35. package/src/__tests__/conversation-provider-retry-repair.test.ts +6 -0
  36. package/src/__tests__/conversation-runtime-assembly.test.ts +65 -0
  37. package/src/__tests__/conversation-slash-unknown.test.ts +6 -0
  38. package/src/__tests__/conversation-speed-override.test.ts +0 -3
  39. package/src/__tests__/conversation-store.test.ts +0 -18
  40. package/src/__tests__/conversation-surfaces-app-control.test.ts +15 -4
  41. package/src/__tests__/conversation-surfaces-data-persist.test.ts +404 -0
  42. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +2 -5
  43. package/src/__tests__/conversation-workspace-injection.test.ts +6 -0
  44. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +6 -0
  45. package/src/__tests__/credentials-cli.test.ts +7 -0
  46. package/src/__tests__/cu-unified-flow.test.ts +176 -10
  47. package/src/__tests__/date-context.test.ts +164 -2
  48. package/src/__tests__/disk-pressure-guard.test.ts +262 -0
  49. package/src/__tests__/disk-pressure-lifecycle.test.ts +168 -0
  50. package/src/__tests__/disk-pressure-policy.test.ts +241 -0
  51. package/src/__tests__/disk-pressure-routes.test.ts +379 -0
  52. package/src/__tests__/disk-pressure-tools.test.ts +277 -0
  53. package/src/__tests__/disk-usage.test.ts +150 -0
  54. package/src/__tests__/events-client-registration.test.ts +52 -0
  55. package/src/__tests__/events-dev-bypass-actor.test.ts +162 -0
  56. package/src/__tests__/file-write-tool.test.ts +4 -10
  57. package/src/__tests__/filing-service.test.ts +3 -4
  58. package/src/__tests__/heartbeat-disk-pressure.test.ts +183 -0
  59. package/src/__tests__/heartbeat-service.test.ts +260 -11
  60. package/src/__tests__/host-app-control-proxy.test.ts +195 -25
  61. package/src/__tests__/host-bash-proxy.test.ts +227 -34
  62. package/src/__tests__/host-bash-routes.test.ts +178 -13
  63. package/src/__tests__/host-cu-proxy.test.ts +210 -3
  64. package/src/__tests__/host-cu-routes-targeted.test.ts +141 -12
  65. package/src/__tests__/host-file-proxy-targeted.test.ts +48 -9
  66. package/src/__tests__/host-file-proxy.test.ts +268 -6
  67. package/src/__tests__/host-file-routes-targeted.test.ts +175 -17
  68. package/src/__tests__/host-transfer-proxy-targeted.test.ts +408 -59
  69. package/src/__tests__/host-transfer-routes-targeted.test.ts +232 -17
  70. package/src/__tests__/http-user-message-parity.test.ts +107 -1
  71. package/src/__tests__/injector-chain.test.ts +18 -6
  72. package/src/__tests__/injector-disk-pressure.test.ts +224 -0
  73. package/src/__tests__/managed-profile-guard.test.ts +18 -0
  74. package/src/__tests__/mcp-abort-signal.test.ts +130 -0
  75. package/src/__tests__/memory-admin-recall.test.ts +3 -11
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +22 -1
  77. package/src/__tests__/normalize-onboarding.test.ts +180 -0
  78. package/src/__tests__/oauth-connect-routes.test.ts +316 -0
  79. package/src/__tests__/oauth-provider-seed-logos.test.ts +24 -2
  80. package/src/__tests__/onboarding-persona-write.test.ts +308 -0
  81. package/src/__tests__/openai-provider.test.ts +45 -8
  82. package/src/__tests__/persist-onboarding-artifacts.test.ts +44 -64
  83. package/src/__tests__/platform-callback-registration.test.ts +21 -4
  84. package/src/__tests__/platform.test.ts +2 -1
  85. package/src/__tests__/playbook-execution.test.ts +0 -43
  86. package/src/__tests__/plugin-tool-contribution.test.ts +47 -0
  87. package/src/__tests__/prechat-onboarding-contract.test.ts +214 -27
  88. package/src/__tests__/provider-tool-name.test.ts +23 -0
  89. package/src/__tests__/relay-server.test.ts +15 -4
  90. package/src/__tests__/runtime-events-sse.test.ts +4 -8
  91. package/src/__tests__/scheduler-disk-pressure.test.ts +148 -0
  92. package/src/__tests__/secret-ingress-http.test.ts +0 -1
  93. package/src/__tests__/suggestion-routes.test.ts +46 -0
  94. package/src/__tests__/twilio-validation.test.ts +2 -2
  95. package/src/__tests__/workspace-migration-065-bump-stale-heartbeat-interval.test.ts +122 -0
  96. package/src/__tests__/workspace-migration-066-seed-heartbeat-callsite-cost-default.test.ts +285 -0
  97. package/src/__tests__/workspace-migration-068-release-notes-local-timezone.test.ts +90 -0
  98. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +90 -0
  99. package/src/approvals/guardian-decision-primitive.ts +13 -0
  100. package/src/approvals/guardian-request-resolvers.ts +16 -17
  101. package/src/backup/snapshot-lock.ts +2 -27
  102. package/src/bundler/compiler-tools.ts +3 -2
  103. package/src/calls/call-conversation-messages.ts +46 -10
  104. package/src/cli/commands/__tests__/webhooks.test.ts +0 -4
  105. package/src/cli/commands/bash.ts +35 -108
  106. package/src/cli/commands/contacts.ts +64 -25
  107. package/src/cli/commands/credentials.ts +56 -0
  108. package/src/cli/commands/memory-v2.ts +7 -6
  109. package/src/cli/commands/oauth/__tests__/connect.test.ts +437 -1
  110. package/src/cli/commands/oauth/connect.ts +127 -1
  111. package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +0 -3
  112. package/src/cli/commands/platform/__tests__/connect.test.ts +7 -1
  113. package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
  114. package/src/cli/commands/platform/__tests__/status.test.ts +103 -6
  115. package/src/cli/commands/platform/index.ts +16 -7
  116. package/src/cli/commands/status.ts +57 -0
  117. package/src/cli/program.ts +4 -2
  118. package/src/config/assistant-feature-flags.ts +13 -3
  119. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +4 -3
  120. package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +13 -7
  121. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +2 -2
  122. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +2 -2
  123. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +2 -2
  124. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +2 -2
  125. package/src/config/env.ts +0 -8
  126. package/src/config/feature-flag-registry.json +27 -3
  127. package/src/config/loader.ts +127 -8
  128. package/src/config/schemas/__tests__/memory-v2.test.ts +10 -5
  129. package/src/config/schemas/call-site-catalog.ts +14 -0
  130. package/src/config/schemas/channels.ts +0 -5
  131. package/src/config/schemas/heartbeat.ts +1 -1
  132. package/src/config/schemas/llm.ts +2 -0
  133. package/src/config/schemas/memory-lifecycle.ts +13 -0
  134. package/src/config/schemas/memory-v2.ts +75 -11
  135. package/src/config/schemas/platform.ts +43 -3
  136. package/src/config/schemas/services.ts +28 -0
  137. package/src/config/seed-inference-profiles.ts +230 -33
  138. package/src/contacts/contact-store.ts +0 -25
  139. package/src/daemon/__tests__/conversation-tool-setup.test.ts +86 -25
  140. package/src/daemon/assistant-attachments.ts +4 -4
  141. package/src/daemon/config-watcher.ts +85 -57
  142. package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
  143. package/src/daemon/conversation-agent-loop.ts +170 -33
  144. package/src/daemon/conversation-error.ts +87 -15
  145. package/src/daemon/conversation-lifecycle.ts +1 -3
  146. package/src/daemon/conversation-process.ts +8 -0
  147. package/src/daemon/conversation-runtime-assembly.ts +26 -0
  148. package/src/daemon/conversation-store.ts +2 -2
  149. package/src/daemon/conversation-surfaces.ts +195 -15
  150. package/src/daemon/conversation-tool-setup.ts +57 -14
  151. package/src/daemon/conversation.ts +17 -22
  152. package/src/daemon/date-context.ts +71 -22
  153. package/src/daemon/disk-pressure-background-gate.ts +73 -0
  154. package/src/daemon/disk-pressure-guard.ts +343 -0
  155. package/src/daemon/disk-pressure-policy.ts +163 -0
  156. package/src/daemon/handlers/shared.ts +0 -1
  157. package/src/daemon/handlers/skills.ts +3 -4
  158. package/src/daemon/host-app-control-proxy.ts +137 -41
  159. package/src/daemon/host-bash-proxy.ts +46 -21
  160. package/src/daemon/host-cu-proxy.ts +49 -3
  161. package/src/daemon/host-file-proxy.ts +43 -7
  162. package/src/daemon/host-transfer-proxy.ts +95 -4
  163. package/src/daemon/lifecycle.ts +79 -28
  164. package/src/daemon/meet-host-supervisor.ts +4 -4
  165. package/src/daemon/meet-manifest-loader.ts +0 -1
  166. package/src/daemon/memory-v2-startup.ts +14 -4
  167. package/src/daemon/message-protocol.ts +3 -0
  168. package/src/daemon/message-types/conversations.ts +4 -0
  169. package/src/daemon/message-types/disk-pressure.ts +9 -0
  170. package/src/daemon/message-types/messages.ts +3 -0
  171. package/src/daemon/profiler-run-store.ts +5 -5
  172. package/src/daemon/tool-setup-types.ts +2 -2
  173. package/src/documents/document-store.ts +85 -0
  174. package/src/filing/filing-service.ts +30 -5
  175. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +9 -16
  176. package/src/heartbeat/__tests__/heartbeat-run-store.test.ts +36 -0
  177. package/src/heartbeat/heartbeat-run-store.ts +13 -0
  178. package/src/heartbeat/heartbeat-service.ts +205 -31
  179. package/src/home/feed-scheduler.ts +18 -0
  180. package/src/inbound/platform-callback-registration.ts +8 -15
  181. package/src/ipc/__tests__/clients-list-ipc.test.ts +169 -0
  182. package/src/ipc/assistant-server.ts +56 -2
  183. package/src/ipc/gateway-client.ts +37 -3
  184. package/src/live-voice/live-voice-archive.ts +4 -4
  185. package/src/live-voice/protocol.ts +5 -7
  186. package/src/media/image-service.ts +1 -7
  187. package/src/memory/__tests__/fixtures/memory-v2-activation-fixtures.ts +21 -13
  188. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +52 -22
  189. package/src/memory/__tests__/memory-v2-activation-log-store.test.ts +0 -6
  190. package/src/memory/__tests__/memory-v2-concept-frequency.test.ts +272 -0
  191. package/src/memory/admin.ts +5 -9
  192. package/src/memory/context-search/agent-runner.ts +19 -2
  193. package/src/memory/context-search/sources/conversations.ts +2 -11
  194. package/src/memory/context-search/sources/memory-v2.ts +5 -4
  195. package/src/memory/context-search/sources/memory.ts +0 -1
  196. package/src/memory/context-search/types.ts +0 -1
  197. package/src/memory/conversation-crud.ts +4 -12
  198. package/src/memory/db-init.ts +2 -0
  199. package/src/memory/embedding-runtime-manager.ts +119 -5
  200. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +32 -21
  201. package/src/memory/graph/conversation-graph-memory.ts +42 -54
  202. package/src/memory/graph/extraction.ts +1 -3
  203. package/src/memory/graph/graph-search.test.ts +10 -67
  204. package/src/memory/graph/graph-search.ts +1 -20
  205. package/src/memory/graph/retriever.test.ts +6 -0
  206. package/src/memory/graph/retriever.ts +6 -10
  207. package/src/memory/indexer.ts +54 -45
  208. package/src/memory/job-handlers/backfill.ts +2 -11
  209. package/src/memory/job-handlers/cleanup.ts +43 -0
  210. package/src/memory/job-handlers/embedding.ts +6 -8
  211. package/src/memory/job-handlers/summarization.ts +2 -7
  212. package/src/memory/jobs-store.ts +48 -0
  213. package/src/memory/jobs-worker.ts +81 -43
  214. package/src/memory/memory-v2-activation-log-store.ts +32 -14
  215. package/src/memory/memory-v2-concept-frequency.ts +169 -0
  216. package/src/memory/migrations/239-trace-events-created-at-index.ts +18 -0
  217. package/src/memory/migrations/index.ts +1 -0
  218. package/src/memory/pkb/pkb-search.test.ts +6 -0
  219. package/src/memory/qdrant-client.ts +0 -13
  220. package/src/memory/rerank-local.ts +374 -0
  221. package/src/memory/search/semantic.ts +6 -67
  222. package/src/memory/trace-event-store.ts +1 -17
  223. package/src/memory/v2/__tests__/activation.test.ts +311 -250
  224. package/src/memory/v2/__tests__/consolidation-job.test.ts +40 -8
  225. package/src/memory/v2/__tests__/injection.test.ts +157 -167
  226. package/src/memory/v2/__tests__/prompts-consolidation.test.ts +61 -2
  227. package/src/memory/v2/__tests__/qdrant.test.ts +16 -0
  228. package/src/memory/v2/__tests__/reranker.test.ts +338 -0
  229. package/src/memory/v2/__tests__/sim.test.ts +5 -199
  230. package/src/memory/v2/__tests__/skill-store.test.ts +71 -65
  231. package/src/memory/v2/__tests__/static-context.test.ts +76 -1
  232. package/src/memory/v2/activation.ts +149 -156
  233. package/src/memory/v2/consolidation-job.ts +62 -12
  234. package/src/memory/v2/injection.ts +47 -60
  235. package/src/memory/v2/prompts/consolidation.ts +36 -1
  236. package/src/memory/v2/qdrant.ts +99 -0
  237. package/src/memory/v2/reranker.ts +177 -0
  238. package/src/memory/v2/sim.ts +10 -84
  239. package/src/memory/v2/skill-content.ts +4 -3
  240. package/src/memory/v2/skill-store.ts +82 -59
  241. package/src/memory/v2/static-context.ts +22 -0
  242. package/src/memory/v2/types.ts +10 -10
  243. package/src/notifications/copy-composer.ts +13 -0
  244. package/src/notifications/signal.ts +4 -0
  245. package/src/oauth/AGENTS.md +3 -1
  246. package/src/oauth/__tests__/oauth-connect-state.test.ts +137 -0
  247. package/src/oauth/connect-orchestrator.ts +2 -0
  248. package/src/oauth/connection-resolver.test.ts +66 -1
  249. package/src/oauth/connection-resolver.ts +55 -1
  250. package/src/oauth/oauth-connect-state.ts +77 -0
  251. package/src/oauth/seed-providers.ts +58 -1
  252. package/src/plugins/defaults/injectors.ts +35 -2
  253. package/src/plugins/defaults/memory-retrieval.ts +5 -6
  254. package/src/plugins/types.ts +7 -0
  255. package/src/proactive-artifact/aux-message-injector.ts +74 -0
  256. package/src/proactive-artifact/decision.test.ts +226 -0
  257. package/src/proactive-artifact/decision.ts +165 -0
  258. package/src/proactive-artifact/index.ts +7 -0
  259. package/src/proactive-artifact/job.test.ts +867 -0
  260. package/src/proactive-artifact/job.ts +352 -0
  261. package/src/proactive-artifact/message-copy.ts +41 -0
  262. package/src/proactive-artifact/trigger-state.test.ts +277 -0
  263. package/src/proactive-artifact/trigger-state.ts +119 -0
  264. package/src/prompts/normalize-onboarding.ts +80 -0
  265. package/src/prompts/persona-resolver.ts +101 -9
  266. package/src/prompts/system-prompt.ts +21 -7
  267. package/src/prompts/templates/BOOTSTRAP.md +13 -5
  268. package/src/providers/__tests__/retry-callsite.test.ts +222 -1
  269. package/src/providers/model-intents.ts +7 -0
  270. package/src/providers/openrouter/client.ts +8 -0
  271. package/src/providers/retry.ts +50 -0
  272. package/src/providers/types.ts +1 -0
  273. package/src/runtime/__tests__/agent-wake.test.ts +456 -3
  274. package/src/runtime/agent-wake.ts +238 -100
  275. package/src/runtime/assistant-event-hub.ts +36 -6
  276. package/src/runtime/assistant-event.ts +0 -1
  277. package/src/runtime/auth/__tests__/route-policy.test.ts +64 -0
  278. package/src/runtime/auth/route-policy.ts +14 -1
  279. package/src/runtime/auth/same-actor.ts +216 -0
  280. package/src/runtime/channel-retry-sweep.ts +65 -1
  281. package/src/runtime/guardian-reply-router.ts +10 -0
  282. package/src/runtime/local-actor-identity.ts +52 -11
  283. package/src/runtime/pending-interactions.ts +8 -0
  284. package/src/runtime/routes/__tests__/client-routes.test.ts +155 -0
  285. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +0 -5
  286. package/src/runtime/routes/__tests__/heartbeat-routes.test.ts +1 -1
  287. package/src/runtime/routes/client-routes.ts +20 -2
  288. package/src/runtime/routes/contact-routes.ts +0 -25
  289. package/src/runtime/routes/conversation-routes.ts +35 -26
  290. package/src/runtime/routes/debug-bash-routes.ts +163 -0
  291. package/src/runtime/routes/disk-pressure-routes.ts +121 -0
  292. package/src/runtime/routes/document-pdf-renderer.ts +6 -2
  293. package/src/runtime/routes/documents-routes.ts +2 -75
  294. package/src/runtime/routes/events-routes.ts +41 -9
  295. package/src/runtime/routes/host-bash-routes.ts +23 -3
  296. package/src/runtime/routes/host-cu-routes.ts +33 -6
  297. package/src/runtime/routes/host-file-routes.ts +32 -6
  298. package/src/runtime/routes/host-transfer-routes.ts +79 -16
  299. package/src/runtime/routes/identity-routes.ts +7 -138
  300. package/src/runtime/routes/inbound-message-handler.ts +77 -12
  301. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +3 -0
  302. package/src/runtime/routes/index.ts +6 -0
  303. package/src/runtime/routes/memory-item-routes.test.ts +41 -15
  304. package/src/runtime/routes/memory-v2-routes.ts +33 -0
  305. package/src/runtime/routes/oauth-connect-routes.ts +153 -0
  306. package/src/runtime/verification-outbound-actions.ts +4 -4
  307. package/src/schedule/run-script.ts +37 -5
  308. package/src/schedule/scheduler.ts +20 -1
  309. package/src/security/encrypted-store.ts +2 -0
  310. package/src/security/secure-keys.ts +55 -0
  311. package/src/skills/remote-skill-policy.ts +4 -10
  312. package/src/subagent/index.ts +1 -7
  313. package/src/subagent/manager.ts +1 -15
  314. package/src/tasks/task-runner.ts +0 -1
  315. package/src/tasks/task-store.ts +0 -3
  316. package/src/tools/background-tool-registry.ts +17 -3
  317. package/src/tools/host-filesystem/edit.test.ts +151 -0
  318. package/src/tools/host-filesystem/edit.ts +43 -1
  319. package/src/tools/host-filesystem/read.test.ts +129 -0
  320. package/src/tools/host-filesystem/read.ts +43 -1
  321. package/src/tools/host-filesystem/transfer.test.ts +127 -2
  322. package/src/tools/host-filesystem/transfer.ts +56 -11
  323. package/src/tools/host-filesystem/write.test.ts +134 -0
  324. package/src/tools/host-filesystem/write.ts +43 -1
  325. package/src/tools/host-terminal/host-shell.ts +13 -6
  326. package/src/tools/mcp/mcp-tool-factory.ts +2 -1
  327. package/src/tools/memory/register.test.ts +12 -9
  328. package/src/tools/memory/register.ts +1 -2
  329. package/src/tools/provider-tool-name.ts +28 -0
  330. package/src/tools/registry.ts +30 -9
  331. package/src/tools/terminal/shell.ts +9 -1
  332. package/src/tools/tool-approval-handler.ts +31 -6
  333. package/src/tools/types.ts +24 -2
  334. package/src/tts/provider-catalog.ts +3 -5
  335. package/src/util/disk-usage.ts +138 -0
  336. package/src/util/platform.ts +21 -11
  337. package/src/util/process-liveness.ts +26 -0
  338. package/src/workspace/heartbeat-service.ts +19 -0
  339. package/src/workspace/migrations/065-bump-stale-heartbeat-interval.ts +60 -0
  340. package/src/workspace/migrations/066-seed-heartbeat-callsite-cost-default.ts +146 -0
  341. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +72 -0
  342. package/src/workspace/migrations/068-release-notes-local-timezone.ts +65 -0
  343. package/src/workspace/migrations/registry.ts +8 -0
  344. package/src/__tests__/conversation-tool-setup-memory-scope.test.ts +0 -167
  345. package/src/memory/v2/__tests__/skill-qdrant.test.ts +0 -657
  346. package/src/memory/v2/skill-qdrant.ts +0 -404
  347. package/src/signals/bash.ts +0 -198
@@ -12,20 +12,37 @@ export interface MemoryV2ConceptRowRecord {
12
12
  simUser: number;
13
13
  simAssistant: number;
14
14
  simNow: number;
15
+ /**
16
+ * Cross-encoder rerank delta in raw rerank space (`alpha · r_norm_u`)
17
+ * for the user channel. Zero when rerank is disabled or the slug fell
18
+ * outside the unified top-K-by-pre-rerank-A_o window. Applied
19
+ * additively to A_o weighted by `c_user` — `simUser` itself is the
20
+ * raw fused score and never carries the boost. Stored as a JSON field,
21
+ * so older log rows pre-date this addition and decode with `undefined`;
22
+ * readers should fall back to 0.
23
+ */
24
+ simUserRerankBoost: number;
25
+ /**
26
+ * Cross-encoder rerank delta for the assistant channel. Same semantics
27
+ * as `simUserRerankBoost`, weighted by `c_assistant` when applied to
28
+ * A_o. The NOW channel intentionally bypasses rerank, so there is no
29
+ * `simNowRerankBoost`.
30
+ */
31
+ simAssistantRerankBoost: number;
32
+ /**
33
+ * True when rerank ran and this slug landed in the unified
34
+ * top-K-by-pre-rerank-A_o pool. Distinguishes "cross-encoder evaluated
35
+ * this and chose 0" from "rerank skipped this slug" so the inspector
36
+ * can keep the rerank rows visible at `+0.000` instead of silently
37
+ * dropping them. Older log rows pre-date this field and decode with
38
+ * `undefined`; readers should fall back to `false`.
39
+ */
40
+ inRerankPool: boolean;
15
41
  spreadContribution: number;
16
42
  source: "prior_state" | "ann_top50" | "both";
17
43
  status: "in_context" | "injected" | "not_injected" | "page_missing";
18
44
  }
19
45
 
20
- export interface MemoryV2SkillRowRecord {
21
- id: string;
22
- activation: number;
23
- simUser: number;
24
- simAssistant: number;
25
- simNow: number;
26
- status: "injected" | "not_injected";
27
- }
28
-
29
46
  export interface MemoryV2ConfigSnapshot {
30
47
  d: number;
31
48
  c_user: number;
@@ -34,7 +51,6 @@ export interface MemoryV2ConfigSnapshot {
34
51
  k: number;
35
52
  hops: number;
36
53
  top_k: number;
37
- top_k_skills: number;
38
54
  epsilon: number;
39
55
  }
40
56
 
@@ -43,7 +59,6 @@ export interface RecordMemoryV2ActivationLogParams {
43
59
  turn: number;
44
60
  mode: "context-load" | "per-turn";
45
61
  concepts: MemoryV2ConceptRowRecord[];
46
- skills: MemoryV2SkillRowRecord[];
47
62
  config: MemoryV2ConfigSnapshot;
48
63
  }
49
64
 
@@ -51,6 +66,11 @@ export function recordMemoryV2ActivationLog(
51
66
  params: RecordMemoryV2ActivationLogParams,
52
67
  ): void {
53
68
  const db = getDb();
69
+ // Skills now live as concept rows under `slug: "skills/<id>"`, so the
70
+ // separate `skills_json` column is always written empty. The column itself
71
+ // remains in the schema for backwards-compat with prior log rows; the
72
+ // reader drops it. A future migration can DROP the column once those rows
73
+ // age out of relevance.
54
74
  db.insert(memoryV2ActivationLogs)
55
75
  .values({
56
76
  id: uuid(),
@@ -59,7 +79,7 @@ export function recordMemoryV2ActivationLog(
59
79
  turn: params.turn,
60
80
  mode: params.mode,
61
81
  conceptsJson: JSON.stringify(params.concepts),
62
- skillsJson: JSON.stringify(params.skills),
82
+ skillsJson: "[]",
63
83
  configJson: JSON.stringify(params.config),
64
84
  createdAt: Date.now(),
65
85
  })
@@ -87,7 +107,6 @@ export interface MemoryV2ActivationLog {
87
107
  turn: number;
88
108
  mode: "context-load" | "per-turn";
89
109
  concepts: MemoryV2ConceptRowRecord[];
90
- skills: MemoryV2SkillRowRecord[];
91
110
  config: MemoryV2ConfigSnapshot;
92
111
  }
93
112
 
@@ -109,7 +128,6 @@ export function getMemoryV2ActivationLogByMessageIds(
109
128
  turn: row.turn,
110
129
  mode: row.mode as "context-load" | "per-turn",
111
130
  concepts: JSON.parse(row.conceptsJson) as MemoryV2ConceptRowRecord[],
112
- skills: JSON.parse(row.skillsJson) as MemoryV2SkillRowRecord[],
113
131
  config: JSON.parse(row.configJson) as MemoryV2ConfigSnapshot,
114
132
  };
115
133
  }
@@ -0,0 +1,169 @@
1
+ import type { MemoryV2ConceptRowRecord } from "./memory-v2-activation-log-store.js";
2
+ import { rawAll, rawGet } from "./raw-query.js";
3
+ import { listPages } from "./v2/page-store.js";
4
+
5
+ type ConceptStatus = MemoryV2ConceptRowRecord["status"];
6
+
7
+ export type ConceptFrequencyCounts = Record<ConceptStatus, number>;
8
+
9
+ export interface ConceptFrequencyRow {
10
+ slug: string;
11
+ counts: ConceptFrequencyCounts;
12
+ totalEvaluations: number;
13
+ lastInjectedAt: number | null;
14
+ /** Whether the slug currently has a markdown page on disk. */
15
+ onDisk: boolean;
16
+ }
17
+
18
+ export interface ConceptFrequencyResponse {
19
+ filters: {
20
+ conversationId: string | null;
21
+ sinceMs: number | null;
22
+ };
23
+ totals: {
24
+ /** Activation log rows scanned (turns of evaluation in the window). */
25
+ logCount: number;
26
+ /** Sum of per-row concept evaluations across all log rows in the window. */
27
+ conceptOccurrences: number;
28
+ };
29
+ /** Per-slug aggregates, sorted by `totalEvaluations` desc, then slug asc. */
30
+ concepts: ConceptFrequencyRow[];
31
+ /**
32
+ * Slugs present on disk that never appeared in any activation log row in
33
+ * the window — i.e. retrieval never even scored them as a candidate.
34
+ */
35
+ neverEvaluatedSlugs: string[];
36
+ }
37
+
38
+ export interface GetConceptFrequencyFilters {
39
+ conversationId?: string;
40
+ sinceMs?: number;
41
+ }
42
+
43
+ interface AggRow {
44
+ slug: string | null;
45
+ status: ConceptStatus | string | null;
46
+ count: number;
47
+ last_seen: number;
48
+ }
49
+
50
+ const ZERO_COUNTS: ConceptFrequencyCounts = {
51
+ injected: 0,
52
+ in_context: 0,
53
+ not_injected: 0,
54
+ page_missing: 0,
55
+ };
56
+
57
+ interface CountRow {
58
+ count: number;
59
+ }
60
+
61
+ export async function getConceptFrequencySummary(
62
+ workspaceDir: string,
63
+ filters: GetConceptFrequencyFilters = {},
64
+ ): Promise<ConceptFrequencyResponse> {
65
+ const conversationId = filters.conversationId ?? null;
66
+ const sinceMs = filters.sinceMs ?? null;
67
+
68
+ // Kick off the on-disk page walk in parallel with the (synchronous) SQL
69
+ // queries below — listPages does fs.readdir, rawAll/rawGet are sync.
70
+ const onDiskSlugsPromise = listPages(workspaceDir);
71
+
72
+ const aggRows = rawAll<AggRow>(
73
+ `SELECT
74
+ json_extract(c.value, '$.slug') AS slug,
75
+ json_extract(c.value, '$.status') AS status,
76
+ COUNT(*) AS count,
77
+ MAX(l.created_at) AS last_seen
78
+ FROM memory_v2_activation_logs l, json_each(l.concepts_json) c
79
+ WHERE (? IS NULL OR l.conversation_id = ?)
80
+ AND (? IS NULL OR l.created_at >= ?)
81
+ GROUP BY slug, status`,
82
+ conversationId,
83
+ conversationId,
84
+ sinceMs,
85
+ sinceMs,
86
+ );
87
+
88
+ const logCountRow = rawGet<CountRow>(
89
+ `SELECT COUNT(*) AS count
90
+ FROM memory_v2_activation_logs
91
+ WHERE (? IS NULL OR conversation_id = ?)
92
+ AND (? IS NULL OR created_at >= ?)`,
93
+ conversationId,
94
+ conversationId,
95
+ sinceMs,
96
+ sinceMs,
97
+ );
98
+
99
+ const bySlug = new Map<string, ConceptFrequencyRow>();
100
+ let conceptOccurrences = 0;
101
+
102
+ for (const row of aggRows) {
103
+ if (!row.slug) continue;
104
+ let entry = bySlug.get(row.slug);
105
+ if (!entry) {
106
+ entry = {
107
+ slug: row.slug,
108
+ counts: { ...ZERO_COUNTS },
109
+ totalEvaluations: 0,
110
+ lastInjectedAt: null,
111
+ onDisk: false,
112
+ };
113
+ bySlug.set(row.slug, entry);
114
+ }
115
+
116
+ switch (row.status) {
117
+ case "injected":
118
+ entry.counts.injected += row.count;
119
+ entry.lastInjectedAt =
120
+ entry.lastInjectedAt === null
121
+ ? row.last_seen
122
+ : Math.max(entry.lastInjectedAt, row.last_seen);
123
+ break;
124
+ case "in_context":
125
+ entry.counts.in_context += row.count;
126
+ break;
127
+ case "not_injected":
128
+ entry.counts.not_injected += row.count;
129
+ break;
130
+ case "page_missing":
131
+ entry.counts.page_missing += row.count;
132
+ break;
133
+ default:
134
+ // Forward-compat: unknown status values are ignored, not summed into
135
+ // totalEvaluations. The activation pipeline produces a closed enum.
136
+ continue;
137
+ }
138
+ entry.totalEvaluations += row.count;
139
+ conceptOccurrences += row.count;
140
+ }
141
+
142
+ const onDiskSlugs = new Set(await onDiskSlugsPromise);
143
+ for (const entry of bySlug.values()) {
144
+ entry.onDisk = onDiskSlugs.has(entry.slug);
145
+ }
146
+
147
+ const neverEvaluatedSlugs: string[] = [];
148
+ for (const slug of onDiskSlugs) {
149
+ if (!bySlug.has(slug)) neverEvaluatedSlugs.push(slug);
150
+ }
151
+ neverEvaluatedSlugs.sort();
152
+
153
+ const concepts = [...bySlug.values()].sort((a, b) => {
154
+ if (b.totalEvaluations !== a.totalEvaluations) {
155
+ return b.totalEvaluations - a.totalEvaluations;
156
+ }
157
+ return a.slug.localeCompare(b.slug);
158
+ });
159
+
160
+ return {
161
+ filters: { conversationId, sinceMs },
162
+ totals: {
163
+ logCount: logCountRow?.count ?? 0,
164
+ conceptOccurrences,
165
+ },
166
+ concepts,
167
+ neverEvaluatedSlugs,
168
+ };
169
+ }
@@ -0,0 +1,18 @@
1
+ import type { DrizzleDb } from "../db-connection.js";
2
+ import { getSqliteFrom } from "../db-connection.js";
3
+ import { withCrashRecovery } from "./validate-migration-state.js";
4
+
5
+ const CHECKPOINT_KEY = "migration_trace_events_created_at_index_v1";
6
+
7
+ /**
8
+ * Add an index on `trace_events.created_at` so the periodic prune job
9
+ * can locate expired rows without a full table scan.
10
+ */
11
+ export function migrateTraceEventsCreatedAtIndex(database: DrizzleDb): void {
12
+ withCrashRecovery(database, CHECKPOINT_KEY, () => {
13
+ const raw = getSqliteFrom(database);
14
+ raw.exec(
15
+ `CREATE INDEX IF NOT EXISTS idx_trace_events_created_at ON trace_events(created_at)`,
16
+ );
17
+ });
18
+ }
@@ -200,6 +200,7 @@ export {
200
200
  migrateHeartbeatRuns,
201
201
  } from "./237-heartbeat-runs.js";
202
202
  export { migrateScheduleRetryPolicy } from "./238-schedule-retry-policy.js";
203
+ export { migrateTraceEventsCreatedAtIndex } from "./239-trace-events-created-at-index.js";
203
204
  export {
204
205
  MIGRATION_REGISTRY,
205
206
  type MigrationRegistryEntry,
@@ -1,6 +1,12 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
3
  import { makeMockLogger } from "../../__tests__/helpers/mock-logger.js";
4
+ import { _setOverridesForTesting } from "../../config/assistant-feature-flags.js";
5
+
6
+ // This test exercises the v1 PKB search path. The `memory-v2-enabled` flag
7
+ // (registry default `true`) makes pkb-search short-circuit to keep traffic
8
+ // off the legacy collection — disable it so the v1 path stays under test.
9
+ _setOverridesForTesting({ "memory-v2-enabled": false });
4
10
 
5
11
  mock.module("../../util/logger.js", () => ({
6
12
  getLogger: () => makeMockLogger(),
@@ -388,7 +388,6 @@ export class VellumQdrantClient {
388
388
  limit: number,
389
389
  targetTypes: Array<"segment" | "item" | "summary" | "media">,
390
390
  excludeMessageIds?: string[],
391
- scopeIds?: string[],
392
391
  ): Promise<QdrantSearchResult[]> {
393
392
  const mustConditions: Array<Record<string, unknown>> = [
394
393
  {
@@ -415,18 +414,6 @@ export class VellumQdrantClient {
415
414
  });
416
415
  }
417
416
 
418
- // Scope filtering: accept points whose memory_scope_id matches one of the
419
- // allowed scopes, OR points that lack the field entirely (legacy data).
420
- // Post-query DB filtering remains as defense-in-depth for legacy points.
421
- if (scopeIds && scopeIds.length > 0) {
422
- mustConditions.push({
423
- should: [
424
- { key: "memory_scope_id", match: { any: scopeIds } },
425
- { is_empty: { key: "memory_scope_id" } },
426
- ],
427
- });
428
- }
429
-
430
417
  const mustNotConditions: Array<Record<string, unknown>> = [
431
418
  { key: "_meta", match: { value: true } },
432
419
  ];
@@ -0,0 +1,374 @@
1
+ /** Local cross-encoder rerank backend — drives the rerank-worker subprocess. */
2
+ import { existsSync } from "node:fs";
3
+
4
+ import type { RerankDtype } from "../config/schemas/memory-v2.js";
5
+ import { getLogger } from "../util/logger.js";
6
+ import { getEmbeddingModelsDir } from "../util/platform.js";
7
+ import { PromiseGuard } from "../util/promise-guard.js";
8
+ import { EmbeddingRuntimeManager } from "./embedding-runtime-manager.js";
9
+
10
+ const log = getLogger("memory-rerank-local");
11
+
12
+ interface WorkerResponse {
13
+ id?: number;
14
+ type?: string;
15
+ scores?: number[];
16
+ error?: string;
17
+ }
18
+
19
+ export class LocalRerankBackend {
20
+ readonly model: string;
21
+ readonly dtype: RerankDtype;
22
+
23
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
24
+ private workerProc: any = null;
25
+ private stdoutBuffer = "";
26
+ private requestCounter = 0;
27
+ private pendingRequests = new Map<
28
+ number,
29
+ { resolve: (response: WorkerResponse) => void }
30
+ >();
31
+ private stdoutReaderActive = false;
32
+ private activeRequests = 0;
33
+ private disposeRequested = false;
34
+
35
+ private readyResolve: (() => void) | null = null;
36
+ private readyReject: ((err: Error) => void) | null = null;
37
+
38
+ private readonly initGuard = new PromiseGuard<void>();
39
+
40
+ constructor(model: string, dtype: RerankDtype) {
41
+ this.model = model;
42
+ this.dtype = dtype;
43
+ }
44
+
45
+ /**
46
+ * Score paired `(queries[i], passages[i])` tuples in one batched ONNX
47
+ * inference call. Multiple distinct queries can ride in a single batch
48
+ * so callers can score the user-channel and assistant-channel queries
49
+ * against a shared candidate set in one tokenizer + forward pass.
50
+ */
51
+ async score(queries: string[], passages: string[]): Promise<number[]> {
52
+ if (this.disposeRequested) {
53
+ throw new Error("Local rerank backend is shutting down");
54
+ }
55
+ if (passages.length === 0) return [];
56
+ if (queries.length !== passages.length) {
57
+ throw new Error(
58
+ `Rerank backend got ${queries.length} queries for ${passages.length} passages`,
59
+ );
60
+ }
61
+
62
+ this.activeRequests++;
63
+ try {
64
+ await this.ensureInitialized();
65
+ const response = await this.sendRequest({ queries, passages });
66
+ if (response.error) {
67
+ throw new Error(`Rerank worker error: ${response.error}`);
68
+ }
69
+ if (!response.scores) {
70
+ throw new Error("Rerank worker returned no scores");
71
+ }
72
+ if (response.scores.length !== passages.length) {
73
+ throw new Error(
74
+ `Rerank worker returned ${response.scores.length} scores for ${passages.length} passages`,
75
+ );
76
+ }
77
+ return response.scores;
78
+ } finally {
79
+ this.activeRequests--;
80
+ this.disposeIfIdle();
81
+ }
82
+ }
83
+
84
+ dispose(): void {
85
+ this.disposeRequested = true;
86
+ this.disposeIfIdle();
87
+ }
88
+
89
+ private sendRequest(payload: {
90
+ queries: string[];
91
+ passages: string[];
92
+ }): Promise<WorkerResponse> {
93
+ const id = ++this.requestCounter;
94
+ return new Promise((resolve) => {
95
+ if (!this.workerProc) {
96
+ resolve({ error: "Worker not initialized" });
97
+ return;
98
+ }
99
+ this.pendingRequests.set(id, { resolve });
100
+ this.workerProc.stdin.write(JSON.stringify({ id, ...payload }) + "\n");
101
+ try {
102
+ this.workerProc.stdin.flush();
103
+ } catch {
104
+ // Worker may have exited — stdout reader cleanup resolves pending requests.
105
+ }
106
+ });
107
+ }
108
+
109
+ private async ensureInitialized(): Promise<void> {
110
+ if (this.workerProc) return;
111
+ await this.initGuard.run(() => this.initialize());
112
+ }
113
+
114
+ private async initialize(): Promise<void> {
115
+ log.info({ model: this.model }, "Initializing local rerank backend");
116
+
117
+ const runtimeManager = new EmbeddingRuntimeManager();
118
+ if (!runtimeManager.isReady()) {
119
+ log.info("Embedding runtime not yet available, waiting for download...");
120
+ await runtimeManager.ensureInstalled();
121
+ }
122
+
123
+ const bunPath = runtimeManager.getBunPath();
124
+ const workerPath = runtimeManager.getRerankWorkerPath();
125
+
126
+ if (!bunPath) {
127
+ throw new Error("Local rerank backend unavailable: no bun binary found");
128
+ }
129
+ if (!existsSync(workerPath)) {
130
+ throw new Error(
131
+ `Local rerank backend unavailable: worker script not found at ${workerPath}`,
132
+ );
133
+ }
134
+
135
+ await this.startWorker(bunPath, workerPath);
136
+ }
137
+
138
+ private async startWorker(
139
+ bunPath: string,
140
+ workerPath: string,
141
+ ): Promise<void> {
142
+ const embeddingModelsDir = getEmbeddingModelsDir();
143
+ const modelCacheDir = `${embeddingModelsDir}/model-cache`;
144
+
145
+ log.info(
146
+ { bunPath, workerPath, model: this.model, dtype: this.dtype },
147
+ "Spawning rerank worker process",
148
+ );
149
+
150
+ const proc = Bun.spawn({
151
+ cmd: [
152
+ bunPath,
153
+ "--smol",
154
+ workerPath,
155
+ this.model,
156
+ modelCacheDir,
157
+ this.dtype,
158
+ ],
159
+ stdin: "pipe",
160
+ stdout: "pipe",
161
+ stderr: "pipe",
162
+ cwd: embeddingModelsDir,
163
+ });
164
+
165
+ this.workerProc = proc;
166
+ this.startStdoutReader();
167
+
168
+ try {
169
+ await this.waitForReady();
170
+ } catch (err) {
171
+ this.workerProc = null;
172
+ this.stdoutReaderActive = false;
173
+ try {
174
+ proc.kill();
175
+ } catch {
176
+ /* may already be dead */
177
+ }
178
+ const exitCode = await proc.exited.catch(() => undefined);
179
+ const stderr = await new Response(proc.stderr).text().catch(() => "");
180
+ if (stderr.trim()) {
181
+ log.warn({ stderr: stderr.trim(), exitCode }, "Rerank worker stderr");
182
+ }
183
+ throw new Error(
184
+ `Rerank worker exited (code ${exitCode ?? "unknown"}): ${
185
+ stderr.trim() || (err instanceof Error ? err.message : String(err))
186
+ }`,
187
+ );
188
+ }
189
+
190
+ this.drainStderr(proc.stderr);
191
+ log.info(
192
+ { pid: proc.pid, model: this.model },
193
+ "Rerank worker process started",
194
+ );
195
+ this.disposeIfIdle();
196
+ }
197
+
198
+ private drainStderr(stderr: ReadableStream<Uint8Array>): void {
199
+ const reader = stderr.getReader();
200
+ const decoder = new TextDecoder();
201
+ (async () => {
202
+ try {
203
+ while (true) {
204
+ const { done, value } = await reader.read();
205
+ if (done) break;
206
+ const text = decoder.decode(value, { stream: true }).trim();
207
+ if (text) log.debug({ workerStderr: text }, "Rerank worker stderr");
208
+ }
209
+ } catch {
210
+ /* expected on shutdown */
211
+ }
212
+ })();
213
+ }
214
+
215
+ private startStdoutReader(): void {
216
+ if (this.stdoutReaderActive || !this.workerProc) return;
217
+ this.stdoutReaderActive = true;
218
+
219
+ const proc = this.workerProc;
220
+ const reader = proc.stdout.getReader();
221
+ const decoder = new TextDecoder();
222
+
223
+ (async () => {
224
+ try {
225
+ while (true) {
226
+ const { done, value } = await reader.read();
227
+ if (done) break;
228
+ this.stdoutBuffer += decoder.decode(value, { stream: true });
229
+ this.processStdoutBuffer();
230
+ }
231
+ } catch {
232
+ /* reader cancelled or stream errored */
233
+ }
234
+
235
+ if (this.workerProc === proc) {
236
+ for (const pending of this.pendingRequests.values()) {
237
+ pending.resolve({
238
+ error: "Rerank worker process exited unexpectedly",
239
+ });
240
+ }
241
+ this.pendingRequests.clear();
242
+ this.workerProc = null;
243
+ this.stdoutReaderActive = false;
244
+ this.stdoutBuffer = "";
245
+ this.initGuard.reset();
246
+ }
247
+ })();
248
+ }
249
+
250
+ private processStdoutBuffer(): void {
251
+ let idx: number;
252
+ while ((idx = this.stdoutBuffer.indexOf("\n")) !== -1) {
253
+ const line = this.stdoutBuffer.slice(0, idx);
254
+ this.stdoutBuffer = this.stdoutBuffer.slice(idx + 1);
255
+ if (!line.trim()) continue;
256
+
257
+ let msg: WorkerResponse;
258
+ try {
259
+ msg = JSON.parse(line);
260
+ } catch {
261
+ continue;
262
+ }
263
+
264
+ if (msg.type === "ready") {
265
+ this.readyResolve?.();
266
+ this.readyResolve = null;
267
+ this.readyReject = null;
268
+ continue;
269
+ }
270
+ if (msg.type === "error" && this.readyReject) {
271
+ this.readyReject(
272
+ new Error(msg.error ?? "Worker initialization failed"),
273
+ );
274
+ this.readyResolve = null;
275
+ this.readyReject = null;
276
+ continue;
277
+ }
278
+
279
+ if (msg.id !== undefined) {
280
+ const pending = this.pendingRequests.get(msg.id);
281
+ if (pending) {
282
+ this.pendingRequests.delete(msg.id);
283
+ pending.resolve(msg);
284
+ this.disposeIfIdle();
285
+ }
286
+ }
287
+ }
288
+ }
289
+
290
+ private waitForReady(): Promise<void> {
291
+ return new Promise<void>((resolve, reject) => {
292
+ // First-call timeout. Generous because the first run downloads the
293
+ // ONNX weights (~280 MB to ~1 GB depending on model) before loading.
294
+ const timeout = setTimeout(() => {
295
+ this.readyResolve = null;
296
+ this.readyReject = null;
297
+ reject(new Error("Rerank worker timed out waiting for model to load"));
298
+ }, 120_000);
299
+
300
+ this.readyResolve = () => {
301
+ clearTimeout(timeout);
302
+ resolve();
303
+ };
304
+ this.readyReject = (err: Error) => {
305
+ clearTimeout(timeout);
306
+ reject(err);
307
+ };
308
+
309
+ this.workerProc?.exited.then(() => {
310
+ if (this.readyResolve) {
311
+ clearTimeout(timeout);
312
+ this.readyResolve = null;
313
+ this.readyReject = null;
314
+ reject(
315
+ new Error("Rerank worker process exited before becoming ready"),
316
+ );
317
+ }
318
+ });
319
+ });
320
+ }
321
+
322
+ private disposeIfIdle(): void {
323
+ if (!this.disposeRequested) return;
324
+ if (this.activeRequests > 0) return;
325
+ if (this.pendingRequests.size > 0) return;
326
+ if (this.readyResolve || this.readyReject) return;
327
+
328
+ const proc = this.workerProc;
329
+ this.workerProc = null;
330
+ this.stdoutReaderActive = false;
331
+ this.stdoutBuffer = "";
332
+ this.initGuard.reset();
333
+
334
+ if (!proc) return;
335
+
336
+ try {
337
+ proc.kill();
338
+ } catch {
339
+ /* may already be exiting */
340
+ }
341
+ }
342
+ }
343
+
344
+ // ── Module-level singleton management ─────────────────────────────────
345
+
346
+ let _backend: LocalRerankBackend | null = null;
347
+
348
+ export function getOrCreateRerankBackend(
349
+ model: string,
350
+ dtype: RerankDtype,
351
+ ): LocalRerankBackend {
352
+ if (_backend?.model === model && _backend.dtype === dtype) return _backend;
353
+ if (_backend) {
354
+ try {
355
+ _backend.dispose();
356
+ } catch {
357
+ /* best effort */
358
+ }
359
+ }
360
+ _backend = new LocalRerankBackend(model, dtype);
361
+ return _backend;
362
+ }
363
+
364
+ /** @internal Test-only: reset the cached backend. */
365
+ export function _resetRerankBackendForTests(): void {
366
+ if (_backend) {
367
+ try {
368
+ _backend.dispose();
369
+ } catch {
370
+ /* best effort */
371
+ }
372
+ }
373
+ _backend = null;
374
+ }