@vellumai/assistant 0.5.16 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (407) hide show
  1. package/ARCHITECTURE.md +1 -1
  2. package/Dockerfile +0 -3
  3. package/knip.json +2 -1
  4. package/openapi.yaml +660 -80
  5. package/package.json +1 -1
  6. package/src/__tests__/actor-token-service.test.ts +68 -0
  7. package/src/__tests__/agent-loop.test.ts +0 -32
  8. package/src/__tests__/always-loaded-tools-guard.test.ts +2 -2
  9. package/src/__tests__/anthropic-provider.test.ts +57 -3
  10. package/src/__tests__/app-compiler.test.ts +120 -0
  11. package/src/__tests__/assistant-feature-flags-integration.test.ts +2 -2
  12. package/src/__tests__/call-conversation-messages.test.ts +2 -6
  13. package/src/__tests__/call-domain.test.ts +2 -6
  14. package/src/__tests__/call-pointer-messages.test.ts +2 -14
  15. package/src/__tests__/call-recovery.test.ts +2 -6
  16. package/src/__tests__/call-routes-http.test.ts +2 -6
  17. package/src/__tests__/call-store.test.ts +2 -6
  18. package/src/__tests__/cancel-resolves-conversation-key.test.ts +2 -6
  19. package/src/__tests__/canonical-guardian-store.test.ts +2 -6
  20. package/src/__tests__/channel-delivery-store.test.ts +2 -6
  21. package/src/__tests__/channel-retry-sweep.test.ts +2 -6
  22. package/src/__tests__/checker.test.ts +25 -3
  23. package/src/__tests__/clawhub.test.ts +54 -24
  24. package/src/__tests__/cli-command-risk-guard.test.ts +14 -0
  25. package/src/__tests__/cli-memory.test.ts +74 -69
  26. package/src/__tests__/config-schema.test.ts +1 -1
  27. package/src/__tests__/config-set-platform-guard.test.ts +302 -0
  28. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +2 -6
  29. package/src/__tests__/contacts-tools.test.ts +31 -0
  30. package/src/__tests__/context-overflow-reducer.test.ts +86 -0
  31. package/src/__tests__/context-token-estimator.test.ts +175 -10
  32. package/src/__tests__/conversation-agent-loop-overflow.test.ts +9 -0
  33. package/src/__tests__/conversation-agent-loop.test.ts +9 -0
  34. package/src/__tests__/conversation-attachments.test.ts +2 -6
  35. package/src/__tests__/conversation-attention-store.test.ts +2 -6
  36. package/src/__tests__/conversation-clear-safety.test.ts +2 -6
  37. package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +4 -10
  38. package/src/__tests__/conversation-disk-view-integration.test.ts +2 -6
  39. package/src/__tests__/conversation-disk-view.test.ts +2 -6
  40. package/src/__tests__/conversation-error.test.ts +33 -2
  41. package/src/__tests__/conversation-fork-crud.test.ts +2 -6
  42. package/src/__tests__/conversation-history-web-search.test.ts +5 -0
  43. package/src/__tests__/conversation-load-history-repair.test.ts +5 -1
  44. package/src/__tests__/conversation-media-retry.test.ts +91 -0
  45. package/src/__tests__/conversation-starter-routes.test.ts +20 -11
  46. package/src/__tests__/conversation-store.test.ts +2 -6
  47. package/src/__tests__/conversation-usage.test.ts +2 -6
  48. package/src/__tests__/conversation-wipe.test.ts +11 -408
  49. package/src/__tests__/credential-execution-feature-gates.test.ts +3 -3
  50. package/src/__tests__/credential-execution-shell-lockdown.test.ts +2 -2
  51. package/src/__tests__/credential-security-e2e.test.ts +2 -0
  52. package/src/__tests__/followup-tools.test.ts +2 -6
  53. package/src/__tests__/graph-extraction-event-date.test.ts +186 -0
  54. package/src/__tests__/guardian-action-conversation-turn.test.ts +2 -6
  55. package/src/__tests__/guardian-action-followup-executor.test.ts +2 -6
  56. package/src/__tests__/guardian-action-followup-store.test.ts +2 -6
  57. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +2 -6
  58. package/src/__tests__/guardian-action-late-reply.test.ts +2 -6
  59. package/src/__tests__/guardian-action-store.test.ts +2 -6
  60. package/src/__tests__/guardian-binding-drift-heal.test.ts +2 -6
  61. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +8 -8
  62. package/src/__tests__/guardian-dispatch.test.ts +2 -6
  63. package/src/__tests__/guardian-grant-minting.test.ts +2 -14
  64. package/src/__tests__/guardian-principal-id-roundtrip.test.ts +2 -6
  65. package/src/__tests__/guardian-routing-invariants.test.ts +192 -6
  66. package/src/__tests__/guardian-routing-state.test.ts +2 -6
  67. package/src/__tests__/guardian-verification-voice-binding.test.ts +2 -6
  68. package/src/__tests__/inbound-invite-redemption.test.ts +2 -6
  69. package/src/__tests__/injection-block.test.ts +154 -0
  70. package/src/__tests__/install-meta.test.ts +506 -0
  71. package/src/__tests__/install-skill-routing.test.ts +292 -0
  72. package/src/__tests__/invite-redemption-service.test.ts +2 -6
  73. package/src/__tests__/invite-routes-http.test.ts +2 -6
  74. package/src/__tests__/jobs-store-qdrant-breaker.test.ts +2 -14
  75. package/src/__tests__/list-messages-attachments.test.ts +2 -6
  76. package/src/__tests__/llm-context-route-provider.test.ts +2 -6
  77. package/src/__tests__/llm-request-log-turn-query.test.ts +2 -6
  78. package/src/__tests__/llm-usage-store.test.ts +2 -6
  79. package/src/__tests__/log-export-workspace.test.ts +2 -6
  80. package/src/__tests__/managed-store.test.ts +38 -11
  81. package/src/__tests__/memory-jobs-worker-backoff.test.ts +2 -8
  82. package/src/__tests__/memory-recall-log-store.test.ts +2 -6
  83. package/src/__tests__/memory-upsert-concurrency.test.ts +4 -112
  84. package/src/__tests__/non-member-access-request.test.ts +2 -6
  85. package/src/__tests__/notification-guardian-path.test.ts +2 -6
  86. package/src/__tests__/oauth-cli.test.ts +364 -2
  87. package/src/__tests__/oauth2-gateway-transport.test.ts +18 -3
  88. package/src/__tests__/outlook-attachments.test.ts +301 -0
  89. package/src/__tests__/outlook-automation-tools.test.ts +425 -0
  90. package/src/__tests__/outlook-categories.test.ts +212 -0
  91. package/src/__tests__/outlook-client-automation.test.ts +246 -0
  92. package/src/__tests__/outlook-compose-tools.test.ts +325 -0
  93. package/src/__tests__/outlook-declutter-tools.test.ts +585 -0
  94. package/src/__tests__/outlook-email-watcher.test.ts +322 -0
  95. package/src/__tests__/outlook-follow-up.test.ts +196 -0
  96. package/src/__tests__/outlook-messaging-provider.test.ts +498 -3
  97. package/src/__tests__/outlook-trash.test.ts +77 -0
  98. package/src/__tests__/outlook-unsubscribe.test.ts +250 -0
  99. package/src/__tests__/platform-callback-registration.test.ts +4 -4
  100. package/src/__tests__/playbook-execution.test.ts +76 -80
  101. package/src/__tests__/playbook-tools.test.ts +5 -7
  102. package/src/__tests__/provider-error-scenarios.test.ts +21 -0
  103. package/src/__tests__/rebuild-index-graph-nodes.test.ts +273 -0
  104. package/src/__tests__/registry.test.ts +2 -2
  105. package/src/__tests__/require-fresh-approval.test.ts +64 -2
  106. package/src/__tests__/runtime-events-sse-parity.test.ts +2 -6
  107. package/src/__tests__/runtime-events-sse.test.ts +2 -6
  108. package/src/__tests__/schedule-store.test.ts +2 -6
  109. package/src/__tests__/schedule-tools.test.ts +2 -6
  110. package/src/__tests__/scheduler-recurrence.test.ts +1 -5
  111. package/src/__tests__/scoped-approval-grants.test.ts +2 -6
  112. package/src/__tests__/scoped-grant-security-matrix.test.ts +2 -6
  113. package/src/__tests__/search-skills-unified.test.ts +421 -0
  114. package/src/__tests__/secret-onetime-send.test.ts +2 -0
  115. package/src/__tests__/send-endpoint-busy.test.ts +2 -6
  116. package/src/__tests__/sequence-store.test.ts +2 -6
  117. package/src/__tests__/server-history-render.test.ts +2 -6
  118. package/src/__tests__/skill-feature-flags-integration.test.ts +38 -31
  119. package/src/__tests__/skill-feature-flags.test.ts +6 -6
  120. package/src/__tests__/skill-load-feature-flag.test.ts +11 -11
  121. package/src/__tests__/skill-memory.test.ts +140 -98
  122. package/src/__tests__/skills-uninstall.test.ts +2 -2
  123. package/src/__tests__/skills.test.ts +1 -1
  124. package/src/__tests__/slack-inbound-verification.test.ts +2 -6
  125. package/src/__tests__/task-compiler.test.ts +2 -6
  126. package/src/__tests__/task-management-tools.test.ts +2 -6
  127. package/src/__tests__/task-memory-cleanup.test.ts +173 -229
  128. package/src/__tests__/task-runner.test.ts +2 -6
  129. package/src/__tests__/task-scheduler.test.ts +2 -6
  130. package/src/__tests__/test-preload.ts +3 -0
  131. package/src/__tests__/tool-approval-handler.test.ts +2 -6
  132. package/src/__tests__/tool-grant-request-escalation.test.ts +2 -6
  133. package/src/__tests__/tool-side-effects-slack-dm.test.ts +276 -0
  134. package/src/__tests__/trust-store.test.ts +1 -1
  135. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +2 -6
  136. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +2 -6
  137. package/src/__tests__/trusted-contact-multichannel.test.ts +2 -6
  138. package/src/__tests__/trusted-contact-verification.test.ts +2 -6
  139. package/src/__tests__/turn-boundary-resolution.test.ts +2 -6
  140. package/src/__tests__/usage-cache-backfill-migration.test.ts +1 -6
  141. package/src/__tests__/usage-routes.test.ts +2 -6
  142. package/src/__tests__/verification-control-plane-policy.test.ts +0 -2
  143. package/src/__tests__/voice-invite-redemption.test.ts +2 -6
  144. package/src/__tests__/voice-scoped-grant-consumer.test.ts +2 -6
  145. package/src/__tests__/voice-session-bridge.test.ts +2 -6
  146. package/src/__tests__/volume-security-guard.test.ts +2 -0
  147. package/src/__tests__/workspace-lifecycle.test.ts +29 -1
  148. package/src/__tests__/workspace-migration-009-backfill-conversation-disk-view.test.ts +2 -6
  149. package/src/__tests__/workspace-migration-013-repair-conversation-disk-view.test.ts +2 -6
  150. package/src/__tests__/workspace-migration-026-backfill-install-meta.test.ts +558 -0
  151. package/src/__tests__/workspace-policy.test.ts +1 -1
  152. package/src/agent/attachments.ts +7 -2
  153. package/src/agent/image-optimize.ts +165 -0
  154. package/src/agent/loop.ts +1 -15
  155. package/src/bundler/app-compiler.ts +179 -2
  156. package/src/bundler/package-resolver.ts +3 -5
  157. package/src/cli/__tests__/notifications.test.ts +1 -2
  158. package/src/cli/cli-memory.ts +67 -64
  159. package/src/cli/commands/avatar.ts +3 -3
  160. package/src/cli/commands/config.ts +26 -13
  161. package/src/cli/commands/doctor.ts +2 -2
  162. package/src/cli/commands/memory.ts +41 -55
  163. package/src/cli/commands/oauth/__tests__/connect.test.ts +2 -2
  164. package/src/cli/commands/oauth/__tests__/disconnect.test.ts +2 -2
  165. package/src/cli/commands/oauth/__tests__/mode.test.ts +8 -1
  166. package/src/cli/commands/oauth/__tests__/status.test.ts +2 -2
  167. package/src/cli/commands/oauth/connect.ts +11 -6
  168. package/src/cli/commands/oauth/mode.ts +7 -0
  169. package/src/cli/commands/oauth/shared.ts +39 -3
  170. package/src/cli/commands/platform/__tests__/connect.test.ts +1 -1
  171. package/src/cli/commands/platform/__tests__/disconnect.test.ts +1 -1
  172. package/src/cli/commands/platform/__tests__/status.test.ts +5 -5
  173. package/src/cli/commands/platform/index.ts +16 -16
  174. package/src/cli/commands/skills.ts +88 -16
  175. package/src/cli/commands/trust.ts +2 -2
  176. package/src/cli/lib/daemon-credential-client.ts +2 -3
  177. package/src/config/bundled-skills/acp/TOOLS.json +1 -1
  178. package/src/config/bundled-skills/contacts/SKILL.md +0 -1
  179. package/src/config/bundled-skills/contacts/TOOLS.json +0 -8
  180. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +0 -4
  181. package/src/config/bundled-skills/gmail/SKILL.md +2 -10
  182. package/src/config/bundled-skills/google-calendar/SKILL.md +1 -9
  183. package/src/config/bundled-skills/messaging/SKILL.md +10 -18
  184. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +40 -33
  185. package/src/config/bundled-skills/outlook/SKILL.md +189 -0
  186. package/src/config/bundled-skills/outlook/TOOLS.json +530 -0
  187. package/src/config/bundled-skills/outlook/tools/outlook-attachments.ts +85 -0
  188. package/src/config/bundled-skills/outlook/tools/outlook-categories.ts +77 -0
  189. package/src/config/bundled-skills/outlook/tools/outlook-draft.ts +84 -0
  190. package/src/config/bundled-skills/outlook/tools/outlook-follow-up.ts +94 -0
  191. package/src/config/bundled-skills/outlook/tools/outlook-forward.ts +49 -0
  192. package/src/config/bundled-skills/outlook/tools/outlook-outreach-scan.ts +237 -0
  193. package/src/config/bundled-skills/outlook/tools/outlook-rules.ts +161 -0
  194. package/src/config/bundled-skills/outlook/tools/outlook-send-draft.ts +32 -0
  195. package/src/config/bundled-skills/outlook/tools/outlook-sender-digest.ts +272 -0
  196. package/src/config/bundled-skills/outlook/tools/outlook-trash.ts +29 -0
  197. package/src/config/bundled-skills/outlook/tools/outlook-unsubscribe.ts +129 -0
  198. package/src/config/bundled-skills/outlook/tools/outlook-vacation.ts +87 -0
  199. package/src/config/bundled-skills/outlook/tools/shared.ts +20 -0
  200. package/src/config/bundled-skills/outlook-calendar/SKILL.md +51 -0
  201. package/src/config/bundled-skills/outlook-calendar/TOOLS.json +221 -0
  202. package/src/config/bundled-skills/outlook-calendar/calendar-client.ts +252 -0
  203. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-check-availability.ts +53 -0
  204. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-create-event.ts +74 -0
  205. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-get-event.ts +18 -0
  206. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-list-events.ts +46 -0
  207. package/src/config/bundled-skills/outlook-calendar/tools/outlook-calendar-rsvp.ts +36 -0
  208. package/src/config/bundled-skills/outlook-calendar/tools/shared.ts +17 -0
  209. package/src/config/bundled-skills/outlook-calendar/types.ts +120 -0
  210. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +47 -40
  211. package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +16 -29
  212. package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +16 -18
  213. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +39 -47
  214. package/src/config/bundled-skills/slack/SKILL.md +1 -7
  215. package/src/config/bundled-tool-registry.ts +56 -4
  216. package/src/config/env-registry.ts +15 -8
  217. package/src/config/feature-flag-registry.json +21 -124
  218. package/src/config/schemas/platform.ts +8 -0
  219. package/src/config/schemas/timeouts.ts +1 -1
  220. package/src/config/skills.ts +18 -7
  221. package/src/context/token-estimator.ts +25 -18
  222. package/src/context/window-manager.ts +6 -2
  223. package/src/credential-execution/process-manager.ts +3 -1
  224. package/src/daemon/context-overflow-reducer.ts +46 -2
  225. package/src/daemon/conversation-agent-loop-handlers.ts +123 -82
  226. package/src/daemon/conversation-agent-loop.ts +96 -61
  227. package/src/daemon/conversation-error.ts +31 -8
  228. package/src/daemon/conversation-lifecycle.ts +33 -0
  229. package/src/daemon/conversation-media-retry.ts +85 -7
  230. package/src/daemon/conversation-notifiers.ts +4 -1
  231. package/src/daemon/conversation-runtime-assembly.ts +5 -0
  232. package/src/daemon/conversation.ts +41 -2
  233. package/src/daemon/daemon-control.ts +8 -2
  234. package/src/daemon/handlers/shared.ts +22 -12
  235. package/src/daemon/handlers/skills.ts +416 -202
  236. package/src/daemon/lifecycle.ts +40 -1
  237. package/src/daemon/main.ts +5 -1
  238. package/src/daemon/message-types/conversations.ts +4 -1
  239. package/src/daemon/message-types/messages.ts +3 -1
  240. package/src/daemon/message-types/skills.ts +97 -36
  241. package/src/daemon/providers-setup.ts +5 -0
  242. package/src/daemon/server.ts +11 -2
  243. package/src/daemon/tool-side-effects.ts +27 -5
  244. package/src/heartbeat/heartbeat-service.ts +1 -0
  245. package/src/hooks/cli.ts +2 -2
  246. package/src/hooks/runner.ts +15 -38
  247. package/src/inbound/platform-callback-registration.ts +14 -14
  248. package/src/memory/admin.ts +11 -45
  249. package/src/memory/conversation-bootstrap.ts +2 -0
  250. package/src/memory/conversation-crud.ts +242 -348
  251. package/src/memory/conversation-group-migration.ts +157 -0
  252. package/src/memory/conversation-queries.ts +4 -2
  253. package/src/memory/db-init.ts +30 -3
  254. package/src/memory/embed.ts +73 -0
  255. package/src/memory/embedding-backend.ts +8 -14
  256. package/src/memory/embedding-runtime-manager.ts +12 -114
  257. package/src/memory/fingerprint.ts +2 -2
  258. package/src/memory/graph/bootstrap.ts +512 -0
  259. package/src/memory/graph/capability-seed.ts +297 -0
  260. package/src/memory/graph/consolidation.ts +691 -0
  261. package/src/memory/graph/conversation-graph-memory.ts +630 -0
  262. package/src/memory/graph/decay.test.ts +208 -0
  263. package/src/memory/graph/decay.ts +195 -0
  264. package/src/memory/graph/extraction-job.ts +69 -0
  265. package/src/memory/graph/extraction.test.ts +936 -0
  266. package/src/memory/graph/extraction.ts +1254 -0
  267. package/src/memory/graph/graph-search.ts +266 -0
  268. package/src/memory/graph/image-ref-utils.ts +29 -0
  269. package/src/memory/graph/injection.test.ts +513 -0
  270. package/src/memory/graph/injection.ts +439 -0
  271. package/src/memory/graph/inspect.ts +534 -0
  272. package/src/memory/graph/narrative.ts +267 -0
  273. package/src/memory/graph/pattern-scan.ts +269 -0
  274. package/src/memory/graph/retriever.ts +1008 -0
  275. package/src/memory/graph/scoring.test.ts +548 -0
  276. package/src/memory/graph/scoring.ts +232 -0
  277. package/src/memory/graph/serendipity.ts +65 -0
  278. package/src/memory/graph/store.test.ts +1050 -0
  279. package/src/memory/graph/store.ts +699 -0
  280. package/src/memory/graph/tool-handlers.ts +426 -0
  281. package/src/memory/graph/tools.ts +141 -0
  282. package/src/memory/graph/triggers.test.ts +487 -0
  283. package/src/memory/graph/triggers.ts +223 -0
  284. package/src/memory/graph/types.ts +271 -0
  285. package/src/memory/group-crud.ts +191 -0
  286. package/src/memory/indexer.ts +37 -19
  287. package/src/memory/job-handlers/cleanup.ts +0 -53
  288. package/src/memory/job-handlers/conversation-starters.ts +91 -53
  289. package/src/memory/job-handlers/embedding.ts +5 -31
  290. package/src/memory/job-handlers/index-maintenance.ts +23 -11
  291. package/src/memory/job-handlers/summarization.ts +32 -17
  292. package/src/memory/job-utils.ts +1 -1
  293. package/src/memory/jobs-store.ts +50 -70
  294. package/src/memory/jobs-worker.ts +147 -112
  295. package/src/memory/message-content.ts +1 -0
  296. package/src/memory/migrations/202-memory-graph-tables.ts +130 -0
  297. package/src/memory/migrations/203-drop-memory-items-tables.ts +23 -0
  298. package/src/memory/migrations/204-rename-memory-graph-type-values.ts +46 -0
  299. package/src/memory/migrations/205-memory-graph-image-refs.ts +11 -0
  300. package/src/memory/migrations/index.ts +4 -0
  301. package/src/memory/migrations/registry.ts +8 -0
  302. package/src/memory/qdrant-client.ts +44 -17
  303. package/src/memory/schema/index.ts +1 -0
  304. package/src/memory/schema/memory-graph.ts +139 -0
  305. package/src/memory/search/semantic.ts +47 -91
  306. package/src/memory/task-memory-cleanup.ts +28 -50
  307. package/src/messaging/providers/outlook/adapter.ts +8 -1
  308. package/src/messaging/providers/outlook/client.ts +299 -0
  309. package/src/messaging/providers/outlook/types.ts +118 -0
  310. package/src/notifications/adapters/macos.ts +1 -0
  311. package/src/notifications/copy-composer.ts +9 -0
  312. package/src/notifications/signal.ts +16 -0
  313. package/src/oauth/seed-providers.ts +2 -1
  314. package/src/permissions/checker.ts +24 -3
  315. package/src/permissions/defaults.ts +4 -4
  316. package/src/permissions/workspace-policy.ts +1 -1
  317. package/src/playbooks/playbook-compiler.ts +19 -18
  318. package/src/playbooks/types.ts +4 -3
  319. package/src/prompts/system-prompt.ts +3 -29
  320. package/src/providers/anthropic/client.ts +47 -19
  321. package/src/providers/gemini/client.ts +1 -1
  322. package/src/providers/openai/client.ts +1 -1
  323. package/src/providers/registry.ts +1 -1
  324. package/src/providers/retry.ts +19 -3
  325. package/src/runtime/actor-trust-resolver.ts +5 -1
  326. package/src/runtime/auth/route-policy.ts +7 -0
  327. package/src/runtime/guardian-reply-router.ts +5 -1
  328. package/src/runtime/http-server.ts +23 -3
  329. package/src/runtime/middleware/auth.ts +20 -0
  330. package/src/runtime/routes/attachment-routes.test.ts +106 -0
  331. package/src/runtime/routes/attachment-routes.ts +106 -16
  332. package/src/runtime/routes/brain-graph-routes.ts +21 -22
  333. package/src/runtime/routes/btw-routes.ts +8 -0
  334. package/src/runtime/routes/conversation-management-routes.ts +2 -0
  335. package/src/runtime/routes/conversation-starter-routes.ts +2 -2
  336. package/src/runtime/routes/debug-routes.ts +1 -1
  337. package/src/runtime/routes/global-search-routes.ts +21 -19
  338. package/src/runtime/routes/group-routes.ts +207 -0
  339. package/src/runtime/routes/guardian-action-routes.ts +21 -10
  340. package/src/runtime/routes/guardian-bootstrap-routes.ts +23 -19
  341. package/src/runtime/routes/inbound-message-handler.ts +19 -0
  342. package/src/runtime/routes/inbound-stages/guardian-activation-intercept.test.ts +292 -0
  343. package/src/runtime/routes/inbound-stages/guardian-activation-intercept.ts +207 -0
  344. package/src/runtime/routes/memory-item-routes.test.ts +2 -14
  345. package/src/runtime/routes/memory-item-routes.ts +341 -388
  346. package/src/runtime/routes/schedule-routes.ts +2 -0
  347. package/src/runtime/routes/skills-routes.ts +103 -37
  348. package/src/runtime/routes/work-items-routes.test.ts +2 -6
  349. package/src/schedule/scheduler.ts +8 -1
  350. package/src/security/oauth2.ts +1 -1
  351. package/src/security/secure-keys.ts +4 -8
  352. package/src/shared/provider-env-vars.ts +19 -0
  353. package/src/skills/catalog-cache.ts +5 -0
  354. package/src/skills/catalog-install.ts +15 -14
  355. package/src/skills/clawhub.ts +134 -154
  356. package/src/skills/install-meta.ts +208 -0
  357. package/src/skills/managed-store.ts +27 -16
  358. package/src/skills/skill-memory.ts +152 -77
  359. package/src/skills/skillssh-registry.ts +19 -17
  360. package/src/tasks/task-runner.ts +3 -1
  361. package/src/telemetry/usage-telemetry-reporter.test.ts +3 -5
  362. package/src/tools/browser/runtime-check.ts +3 -1
  363. package/src/tools/memory/register.ts +63 -46
  364. package/src/tools/permission-checker.ts +7 -1
  365. package/src/tools/shared/filesystem/image-read.ts +22 -85
  366. package/src/tools/terminal/safe-env.ts +1 -0
  367. package/src/tools/tool-manifest.ts +3 -3
  368. package/src/util/browser.ts +25 -10
  369. package/src/util/bun-runtime.ts +172 -0
  370. package/src/watcher/providers/outlook-calendar.ts +343 -0
  371. package/src/watcher/providers/outlook.ts +198 -0
  372. package/src/workspace/migrations/025-remove-oauth-app-setup-skills.ts +76 -0
  373. package/src/workspace/migrations/026-backfill-install-meta.ts +325 -0
  374. package/src/workspace/migrations/027-remove-orphaned-optimized-images-cache.ts +42 -0
  375. package/src/workspace/migrations/registry.ts +6 -0
  376. package/src/__tests__/context-memory-e2e.test.ts +0 -415
  377. package/src/__tests__/journal-context.test.ts +0 -268
  378. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +0 -297
  379. package/src/__tests__/memory-lifecycle-e2e.test.ts +0 -459
  380. package/src/__tests__/memory-query-builder.test.ts +0 -59
  381. package/src/__tests__/memory-recall-quality.test.ts +0 -1046
  382. package/src/__tests__/memory-regressions.experimental.test.ts +0 -629
  383. package/src/__tests__/memory-regressions.test.ts +0 -3696
  384. package/src/__tests__/memory-retrieval.benchmark.test.ts +0 -295
  385. package/src/daemon/conversation-memory.ts +0 -207
  386. package/src/memory/conversation-starters-cadence.ts +0 -74
  387. package/src/memory/items-extractor.ts +0 -860
  388. package/src/memory/job-handlers/batch-extraction.ts +0 -753
  389. package/src/memory/job-handlers/extraction.ts +0 -40
  390. package/src/memory/job-handlers/journal-carry-forward.test.ts +0 -355
  391. package/src/memory/job-handlers/journal-carry-forward.ts +0 -255
  392. package/src/memory/journal-memory.ts +0 -224
  393. package/src/memory/query-builder.ts +0 -47
  394. package/src/memory/query-expansion.ts +0 -83
  395. package/src/memory/retriever.test.ts +0 -1592
  396. package/src/memory/retriever.ts +0 -1331
  397. package/src/memory/search/formatting.test.ts +0 -140
  398. package/src/memory/search/formatting.ts +0 -262
  399. package/src/memory/search/mmr.ts +0 -139
  400. package/src/memory/search/ranking.ts +0 -15
  401. package/src/memory/search/staleness.ts +0 -40
  402. package/src/memory/search/tier-classifier.ts +0 -18
  403. package/src/memory/search/types.ts +0 -121
  404. package/src/prompts/journal-context.ts +0 -154
  405. package/src/tools/memory/definitions.ts +0 -69
  406. package/src/tools/memory/handlers.test.ts +0 -562
  407. package/src/tools/memory/handlers.ts +0 -434
@@ -1,1046 +0,0 @@
1
- /**
2
- * Memory Recall Quality Fixtures
3
- *
4
- * Fixture-driven tests that guard recall quality: preference recall,
5
- * contradiction suppression, stale-memory filtering, and importance ranking.
6
- * These tests fail if memory quality degrades — they act as guardrails
7
- * before any retrieval or ranking changes.
8
- */
9
- import {
10
- afterAll,
11
- beforeAll,
12
- beforeEach,
13
- describe,
14
- expect,
15
- mock,
16
- test,
17
- } from "bun:test";
18
-
19
- mock.module("../util/logger.js", () => ({
20
- getLogger: () =>
21
- new Proxy({} as Record<string, unknown>, {
22
- get: () => () => {},
23
- }),
24
- }));
25
-
26
- // Stub the local embedding backend so the real ONNX model (2.5 GB RSS) never
27
- // loads — avoids a Bun v1.3.9 panic on process exit.
28
- mock.module("../memory/embedding-local.js", () => ({
29
- LocalEmbeddingBackend: class {
30
- readonly provider = "local" as const;
31
- readonly model: string;
32
- constructor(model: string) {
33
- this.model = model;
34
- }
35
- async embed(texts: string[]): Promise<number[][]> {
36
- return texts.map(() => new Array(384).fill(0));
37
- }
38
- },
39
- }));
40
-
41
- // Dynamic Qdrant mock: tests can push results to be returned by searchWithFilter/hybridSearch
42
- let mockQdrantResults: Array<{
43
- id: string;
44
- score: number;
45
- payload: Record<string, unknown>;
46
- }> = [];
47
-
48
- mock.module("../memory/qdrant-client.js", () => ({
49
- getQdrantClient: () => ({
50
- searchWithFilter: async () => mockQdrantResults,
51
- hybridSearch: async () => mockQdrantResults,
52
- upsertPoints: async () => {},
53
- deletePoints: async () => {},
54
- }),
55
- initQdrantClient: () => {},
56
- }));
57
-
58
- import { DEFAULT_CONFIG } from "../config/defaults.js";
59
-
60
- const TEST_CONFIG = {
61
- ...DEFAULT_CONFIG,
62
- memory: {
63
- ...DEFAULT_CONFIG.memory,
64
- extraction: {
65
- ...DEFAULT_CONFIG.memory.extraction,
66
- useLLM: false,
67
- },
68
- embeddings: {
69
- ...DEFAULT_CONFIG.memory.embeddings,
70
- required: false,
71
- },
72
- },
73
- };
74
-
75
- mock.module("../config/loader.js", () => ({
76
- loadConfig: () => TEST_CONFIG,
77
- getConfig: () => TEST_CONFIG,
78
- invalidateConfigCache: () => {},
79
- }));
80
-
81
- import { getDb, initializeDb, resetDb } from "../memory/db.js";
82
- import { buildMemoryRecall } from "../memory/retriever.js";
83
- import {
84
- conversations,
85
- memoryItems,
86
- memoryItemSources,
87
- messages,
88
- } from "../memory/schema.js";
89
-
90
- // ---------------------------------------------------------------------------
91
- // Helpers
92
- // ---------------------------------------------------------------------------
93
-
94
- /** Insert a standard conversation + message row for fixture setup. */
95
- function insertConversation(
96
- db: ReturnType<typeof getDb>,
97
- id: string,
98
- createdAt: number,
99
- contextCompactedMessageCount = 0,
100
- ) {
101
- db.insert(conversations)
102
- .values({
103
- id,
104
- title: null,
105
- createdAt,
106
- updatedAt: createdAt,
107
- totalInputTokens: 0,
108
- totalOutputTokens: 0,
109
- totalEstimatedCost: 0,
110
- contextSummary: null,
111
- contextCompactedMessageCount,
112
- contextCompactedAt: null,
113
- })
114
- .run();
115
- }
116
-
117
- function insertMessage(
118
- db: ReturnType<typeof getDb>,
119
- id: string,
120
- conversationId: string,
121
- role: string,
122
- text: string,
123
- createdAt: number,
124
- ) {
125
- db.insert(messages)
126
- .values({
127
- id,
128
- conversationId,
129
- role,
130
- content: JSON.stringify([{ type: "text", text }]),
131
- createdAt,
132
- })
133
- .run();
134
- }
135
-
136
- function insertSegment(
137
- db: ReturnType<typeof getDb>,
138
- id: string,
139
- messageId: string,
140
- conversationId: string,
141
- role: string,
142
- text: string,
143
- createdAt: number,
144
- ) {
145
- db.run(`
146
- INSERT INTO memory_segments (
147
- id, message_id, conversation_id, role, segment_index, text, token_estimate, created_at, updated_at
148
- ) VALUES (
149
- '${id}', '${messageId}', '${conversationId}', '${role}', 0, '${text.replace(
150
- /'/g,
151
- "''",
152
- )}', ${Math.ceil(text.split(/\s+/).length * 1.3)}, ${createdAt}, ${createdAt}
153
- )
154
- `);
155
- }
156
-
157
- function insertItem(
158
- db: ReturnType<typeof getDb>,
159
- opts: {
160
- id: string;
161
- kind: string;
162
- subject: string;
163
- statement: string;
164
- status?: string;
165
- confidence?: number;
166
- importance?: number;
167
- accessCount?: number;
168
- firstSeenAt: number;
169
- lastSeenAt?: number;
170
- },
171
- ) {
172
- db.insert(memoryItems)
173
- .values({
174
- id: opts.id,
175
- kind: opts.kind,
176
- subject: opts.subject,
177
- statement: opts.statement,
178
- status: opts.status ?? "active",
179
- confidence: opts.confidence ?? 0.8,
180
- importance: opts.importance ?? 0.6,
181
- accessCount: opts.accessCount ?? 0,
182
- fingerprint: `fp-${opts.id}`,
183
- firstSeenAt: opts.firstSeenAt,
184
- lastSeenAt: opts.lastSeenAt ?? opts.firstSeenAt,
185
- lastUsedAt: null,
186
- })
187
- .run();
188
- }
189
-
190
- function insertItemSource(
191
- db: ReturnType<typeof getDb>,
192
- itemId: string,
193
- messageId: string,
194
- createdAt: number,
195
- ) {
196
- db.insert(memoryItemSources)
197
- .values({
198
- memoryItemId: itemId,
199
- messageId,
200
- evidence: `evidence for ${itemId}`,
201
- createdAt,
202
- })
203
- .run();
204
- }
205
-
206
- /**
207
- * Assert that at least `minFound` of the `expectedSubstrings` appear in `text`.
208
- * This is a deterministic precision@k-style check: given a list of expected
209
- * items and the injected recall text, verify enough of them were recalled.
210
- */
211
- function assertPrecisionAtK(
212
- text: string,
213
- expectedSubstrings: string[],
214
- minFound: number,
215
- label?: string,
216
- ) {
217
- const found = expectedSubstrings.filter((s) => text.includes(s));
218
- const precision = found.length / expectedSubstrings.length;
219
- if (found.length < minFound) {
220
- const prefix = label ? `[${label}] ` : "";
221
- throw new Error(
222
- `${prefix}precision@${expectedSubstrings.length} too low: ` +
223
- `found ${found.length}/${expectedSubstrings.length} (${(
224
- precision * 100
225
- ).toFixed(0)}%), ` +
226
- `need at least ${minFound}. ` +
227
- `Missing: ${expectedSubstrings
228
- .filter((s) => !text.includes(s))
229
- .join(", ")}`,
230
- );
231
- }
232
- }
233
-
234
- // ---------------------------------------------------------------------------
235
- // Suite
236
- // ---------------------------------------------------------------------------
237
-
238
- describe("Memory Recall Quality", () => {
239
- beforeAll(() => {
240
- initializeDb();
241
- });
242
-
243
- beforeEach(() => {
244
- const db = getDb();
245
- db.run("DELETE FROM memory_item_sources");
246
- db.run("DELETE FROM memory_embeddings");
247
- db.run("DELETE FROM memory_items");
248
-
249
- db.run("DELETE FROM memory_segments");
250
- db.run("DELETE FROM messages");
251
- db.run("DELETE FROM conversations");
252
- db.run("DELETE FROM memory_jobs");
253
- db.run("DELETE FROM memory_checkpoints");
254
- mockQdrantResults = [];
255
- });
256
-
257
- afterAll(() => {
258
- resetDb();
259
- });
260
-
261
- // -------------------------------------------------------------------------
262
- // Preference Recall
263
- // -------------------------------------------------------------------------
264
-
265
- describe("preference recall", () => {
266
- test("preferences are recalled when querying about user preferences", async () => {
267
- const db = getDb();
268
- const now = 1_700_000_000_000;
269
- insertConversation(db, "conv-pref", now, 3);
270
- insertMessage(
271
- db,
272
- "msg-pref-1",
273
- "conv-pref",
274
- "user",
275
- "I prefer dark mode and concise answers",
276
- now,
277
- );
278
- insertMessage(
279
- db,
280
- "msg-pref-2",
281
- "conv-pref",
282
- "user",
283
- "My favorite editor is Neovim",
284
- now + 1000,
285
- );
286
- insertMessage(
287
- db,
288
- "msg-fact-1",
289
- "conv-pref",
290
- "user",
291
- "The server runs on port 3000",
292
- now + 2000,
293
- );
294
-
295
- insertSegment(
296
- db,
297
- "seg-pref-1",
298
- "msg-pref-1",
299
- "conv-pref",
300
- "user",
301
- "I prefer dark mode and concise answers",
302
- now,
303
- );
304
- insertSegment(
305
- db,
306
- "seg-pref-2",
307
- "msg-pref-2",
308
- "conv-pref",
309
- "user",
310
- "My favorite editor is Neovim",
311
- now + 1000,
312
- );
313
- insertSegment(
314
- db,
315
- "seg-fact-1",
316
- "msg-fact-1",
317
- "conv-pref",
318
- "user",
319
- "The server runs on port 3000",
320
- now + 2000,
321
- );
322
-
323
- // Also insert items so the pipeline has structured data to inject
324
- insertItem(db, {
325
- id: "item-pref-dark",
326
- kind: "preference",
327
- subject: "display preference",
328
- statement: "User prefers dark mode and concise answers",
329
- importance: 0.8,
330
- firstSeenAt: now,
331
- });
332
- insertItemSource(db, "item-pref-dark", "msg-pref-1", now);
333
- insertItem(db, {
334
- id: "item-pref-editor",
335
- kind: "preference",
336
- subject: "editor preference",
337
- statement: "User favorite editor is Neovim",
338
- importance: 0.8,
339
- firstSeenAt: now + 1000,
340
- });
341
- insertItemSource(db, "item-pref-editor", "msg-pref-2", now + 1000);
342
-
343
- // Mock Qdrant to return both preference items as high-scoring results
344
- mockQdrantResults = [
345
- {
346
- id: "emb-pref-dark",
347
- score: 0.92,
348
- payload: {
349
- target_type: "item",
350
- target_id: "item-pref-dark",
351
- text: "User prefers dark mode and concise answers",
352
- kind: "preference",
353
- status: "active",
354
- created_at: now,
355
- last_seen_at: now,
356
- },
357
- },
358
- {
359
- id: "emb-pref-editor",
360
- score: 0.88,
361
- payload: {
362
- target_type: "item",
363
- target_id: "item-pref-editor",
364
- text: "User favorite editor is Neovim",
365
- kind: "preference",
366
- status: "active",
367
- created_at: now + 1000,
368
- last_seen_at: now + 1000,
369
- },
370
- },
371
- ];
372
-
373
- const recall = await buildMemoryRecall(
374
- "what are my preferences",
375
- "conv-pref",
376
- TEST_CONFIG,
377
- );
378
-
379
- expect(recall.enabled).toBe(true);
380
- // With high-scoring Qdrant results, items should be injected
381
- expect(recall.semanticHits).toBeGreaterThan(0);
382
- expect(recall.injectedText).toContain("dark mode");
383
- expect(recall.injectedText).toContain("Neovim");
384
- });
385
-
386
- test("high-importance preferences outrank low-importance facts in recall", async () => {
387
- const db = getDb();
388
- const now = 1_700_000_100_000;
389
- insertConversation(db, "conv-rank", now, 2);
390
-
391
- // High-importance preference
392
- insertMessage(
393
- db,
394
- "msg-hi",
395
- "conv-rank",
396
- "user",
397
- "I strongly prefer TypeScript over JavaScript",
398
- now,
399
- );
400
- insertSegment(
401
- db,
402
- "seg-hi",
403
- "msg-hi",
404
- "conv-rank",
405
- "user",
406
- "I strongly prefer TypeScript over JavaScript",
407
- now,
408
- );
409
- insertItem(db, {
410
- id: "item-hi-pref",
411
- kind: "preference",
412
- subject: "language preference",
413
- statement: "User strongly prefers TypeScript over JavaScript",
414
- importance: 0.9,
415
- firstSeenAt: now,
416
- });
417
- insertItemSource(db, "item-hi-pref", "msg-hi", now);
418
-
419
- // Low-importance project fact
420
- insertMessage(
421
- db,
422
- "msg-lo",
423
- "conv-rank",
424
- "user",
425
- "The default port is 8080",
426
- now + 1000,
427
- );
428
- insertSegment(
429
- db,
430
- "seg-lo",
431
- "msg-lo",
432
- "conv-rank",
433
- "user",
434
- "The default port is 8080",
435
- now + 1000,
436
- );
437
- insertItem(db, {
438
- id: "item-lo-fact",
439
- kind: "project",
440
- subject: "default port",
441
- statement: "The default port is 8080",
442
- importance: 0.3,
443
- firstSeenAt: now + 1000,
444
- });
445
- insertItemSource(db, "item-lo-fact", "msg-lo", now + 1000);
446
-
447
- // Mock Qdrant to return both items — the high-importance one with a higher score
448
- mockQdrantResults = [
449
- {
450
- id: "emb-hi-pref",
451
- score: 0.95,
452
- payload: {
453
- target_type: "item",
454
- target_id: "item-hi-pref",
455
- text: "User strongly prefers TypeScript over JavaScript",
456
- kind: "preference",
457
- status: "active",
458
- created_at: now,
459
- last_seen_at: now,
460
- },
461
- },
462
- {
463
- id: "emb-lo-fact",
464
- score: 0.7,
465
- payload: {
466
- target_type: "item",
467
- target_id: "item-lo-fact",
468
- text: "The default port is 8080",
469
- kind: "project",
470
- status: "active",
471
- created_at: now + 1000,
472
- last_seen_at: now + 1000,
473
- },
474
- },
475
- ];
476
-
477
- const recall = await buildMemoryRecall(
478
- "TypeScript preference language",
479
- "conv-rank",
480
- TEST_CONFIG,
481
- );
482
-
483
- expect(recall.enabled).toBe(true);
484
- // High-importance preference should be injected
485
- expect(recall.injectedText).toContain("TypeScript");
486
- });
487
- });
488
-
489
- // -------------------------------------------------------------------------
490
- // Contradiction / Superseding Suppression
491
- // -------------------------------------------------------------------------
492
-
493
- describe("supersession suppression", () => {
494
- test("superseded memory items do not appear in recall", async () => {
495
- const db = getDb();
496
- const now = 1_700_000_200_000;
497
- insertConversation(db, "conv-contra", now, 1);
498
-
499
- // New preference (active, supersedes the old one)
500
- insertMessage(
501
- db,
502
- "msg-new-pref",
503
- "conv-contra",
504
- "user",
505
- "I now prefer neovim with LazyVim for editing code",
506
- now,
507
- );
508
- insertSegment(
509
- db,
510
- "seg-new-pref",
511
- "msg-new-pref",
512
- "conv-contra",
513
- "user",
514
- "I now prefer neovim with LazyVim for editing code",
515
- now,
516
- );
517
- insertItem(db, {
518
- id: "item-new-pref",
519
- kind: "preference",
520
- subject: "editor preference",
521
- statement: "User now prefers neovim with LazyVim for editing code",
522
- status: "active",
523
- importance: 0.8,
524
- firstSeenAt: now,
525
- });
526
- insertItemSource(db, "item-new-pref", "msg-new-pref", now);
527
-
528
- // Old preference (superseded by new one via supersession chain)
529
- insertItem(db, {
530
- id: "item-old-pref",
531
- kind: "preference",
532
- subject: "editor preference",
533
- statement: "User prefers vim for editing code",
534
- status: "superseded",
535
- importance: 0.8,
536
- firstSeenAt: now - 50_000,
537
- });
538
-
539
- const recall = await buildMemoryRecall(
540
- "editor preference",
541
- "conv-contra",
542
- TEST_CONFIG,
543
- );
544
-
545
- // Qdrant is mocked empty; superseded items should not leak into injected text
546
- expect(recall.injectedText).not.toContain("vim for editing code");
547
- });
548
-
549
- test("only active items are included in recall (superseded excluded)", async () => {
550
- const db = getDb();
551
- const now = 1_700_000_250_000;
552
- insertConversation(db, "conv-entity-status", now, 1);
553
-
554
- insertMessage(
555
- db,
556
- "msg-entity-active",
557
- "conv-entity-status",
558
- "user",
559
- "We decided to use PostgreSQL as the database",
560
- now,
561
- );
562
- insertSegment(
563
- db,
564
- "seg-entity-active",
565
- "msg-entity-active",
566
- "conv-entity-status",
567
- "user",
568
- "We decided to use PostgreSQL as the database",
569
- now,
570
- );
571
- insertItem(db, {
572
- id: "item-active-db",
573
- kind: "decision",
574
- subject: "database choice",
575
- statement: "Team decided to use PostgreSQL as the primary database",
576
- status: "active",
577
- importance: 0.8,
578
- firstSeenAt: now,
579
- });
580
- insertItemSource(db, "item-active-db", "msg-entity-active", now);
581
-
582
- // Superseded item (should not appear)
583
- insertItem(db, {
584
- id: "item-superseded-db",
585
- kind: "decision",
586
- subject: "database choice",
587
- statement: "Team decided to use MySQL as the primary database",
588
- status: "superseded",
589
- importance: 0.8,
590
- firstSeenAt: now - 100_000,
591
- });
592
-
593
- const recall = await buildMemoryRecall(
594
- "database choice decision",
595
- "conv-entity-status",
596
- TEST_CONFIG,
597
- );
598
-
599
- // Qdrant is mocked empty; superseded MySQL item should not leak.
600
- expect(recall.injectedText).not.toContain("MySQL");
601
- });
602
-
603
- test("invalidated items are excluded from recall", async () => {
604
- const db = getDb();
605
- const now = 1_700_000_275_000;
606
- insertConversation(db, "conv-invalid-status", now, 1);
607
- insertMessage(
608
- db,
609
- "msg-invalid-status",
610
- "conv-invalid-status",
611
- "user",
612
- "Framework preference is React for this codebase.",
613
- now,
614
- );
615
- insertSegment(
616
- db,
617
- "seg-invalid-status",
618
- "msg-invalid-status",
619
- "conv-invalid-status",
620
- "user",
621
- "Framework preference is React for this codebase.",
622
- now,
623
- );
624
-
625
- insertItem(db, {
626
- id: "item-framework-active",
627
- kind: "preference",
628
- subject: "framework preference",
629
- statement: "Framework preference is React for this codebase",
630
- status: "active",
631
- importance: 0.9,
632
- firstSeenAt: now,
633
- });
634
- insertItemSource(db, "item-framework-active", "msg-invalid-status", now);
635
-
636
- // Invalidated item (should not appear in recall)
637
- insertItem(db, {
638
- id: "item-framework-invalidated",
639
- kind: "preference",
640
- subject: "framework preference",
641
- statement: "Framework preference is Angular for this codebase",
642
- status: "invalidated",
643
- importance: 0.9,
644
- firstSeenAt: now - 50_000,
645
- });
646
-
647
- // Mock Qdrant to return the active item as a semantic search result
648
- mockQdrantResults = [
649
- {
650
- id: "emb-framework-active",
651
- score: 0.92,
652
- payload: {
653
- target_type: "item",
654
- target_id: "item-framework-active",
655
- text: "Framework preference is React for this codebase",
656
- kind: "preference",
657
- status: "active",
658
- created_at: now,
659
- last_seen_at: now,
660
- },
661
- },
662
- ];
663
-
664
- const recall = await buildMemoryRecall(
665
- "framework preference",
666
- "conv-invalid-status",
667
- TEST_CONFIG,
668
- );
669
- // Active item should be injected via semantic search; invalidated item should not leak
670
- expect(recall.injectedText).toContain("React");
671
- expect(recall.injectedText).not.toContain("Angular");
672
- });
673
- });
674
-
675
- // -------------------------------------------------------------------------
676
- // Stale Memory Suppression
677
- // -------------------------------------------------------------------------
678
-
679
- describe("stale memory suppression", () => {
680
- test("recently mentioned memories outrank old memories via recency scoring", async () => {
681
- const db = getDb();
682
- const now = Date.now();
683
- const oneMonthAgo = now - 30 * 24 * 60 * 60 * 1000;
684
- insertConversation(db, "conv-stale", now, 2);
685
-
686
- // Recent mention
687
- insertMessage(
688
- db,
689
- "msg-recent",
690
- "conv-stale",
691
- "user",
692
- "We are using Bun as our runtime environment",
693
- now - 1000,
694
- );
695
- insertSegment(
696
- db,
697
- "seg-recent",
698
- "msg-recent",
699
- "conv-stale",
700
- "user",
701
- "We are using Bun as our runtime environment",
702
- now - 1000,
703
- );
704
-
705
- // Old mention (same topic)
706
- insertMessage(
707
- db,
708
- "msg-old",
709
- "conv-stale",
710
- "user",
711
- "We are using Node as our runtime environment",
712
- oneMonthAgo,
713
- );
714
- insertSegment(
715
- db,
716
- "seg-old",
717
- "msg-old",
718
- "conv-stale",
719
- "user",
720
- "We are using Node as our runtime environment",
721
- oneMonthAgo,
722
- );
723
-
724
- // Add items and mock Qdrant with the recent item scoring higher
725
- insertItem(db, {
726
- id: "item-bun-runtime",
727
- kind: "project",
728
- subject: "runtime environment",
729
- statement: "We are using Bun as our runtime environment",
730
- importance: 0.7,
731
- firstSeenAt: now - 1000,
732
- });
733
- insertItemSource(db, "item-bun-runtime", "msg-recent", now - 1000);
734
-
735
- mockQdrantResults = [
736
- {
737
- id: "emb-bun-runtime",
738
- score: 0.9,
739
- payload: {
740
- target_type: "item",
741
- target_id: "item-bun-runtime",
742
- text: "We are using Bun as our runtime environment",
743
- kind: "project",
744
- status: "active",
745
- created_at: now - 1000,
746
- last_seen_at: now - 1000,
747
- },
748
- },
749
- ];
750
-
751
- const recall = await buildMemoryRecall(
752
- "runtime environment",
753
- "conv-stale",
754
- TEST_CONFIG,
755
- );
756
-
757
- expect(recall.enabled).toBe(true);
758
- // Recent Bun item should be injected, old Node reference should not
759
- expect(recall.injectedText).toContain("Bun");
760
- });
761
-
762
- test("frequently accessed items surface via semantic search", async () => {
763
- const db = getDb();
764
- const now = 1_700_000_400_000;
765
- insertConversation(db, "conv-access", now, 2);
766
-
767
- // Frequently accessed item with segment
768
- insertMessage(
769
- db,
770
- "msg-freq",
771
- "conv-access",
772
- "user",
773
- "User timezone is America/Los_Angeles",
774
- now,
775
- );
776
- insertSegment(
777
- db,
778
- "seg-freq",
779
- "msg-freq",
780
- "conv-access",
781
- "user",
782
- "User timezone is America/Los_Angeles",
783
- now,
784
- );
785
- insertItem(db, {
786
- id: "item-freq",
787
- kind: "identity",
788
- subject: "timezone",
789
- statement: "User timezone is America/Los_Angeles",
790
- importance: 0.5,
791
- accessCount: 20,
792
- firstSeenAt: now,
793
- });
794
- insertItemSource(db, "item-freq", "msg-freq", now);
795
-
796
- // Rarely accessed item with segment
797
- insertMessage(
798
- db,
799
- "msg-rare",
800
- "conv-access",
801
- "user",
802
- "User timezone offset is UTC-8",
803
- now + 1000,
804
- );
805
- insertSegment(
806
- db,
807
- "seg-rare",
808
- "msg-rare",
809
- "conv-access",
810
- "user",
811
- "User timezone offset is UTC-8",
812
- now + 1000,
813
- );
814
- insertItem(db, {
815
- id: "item-rare",
816
- kind: "identity",
817
- subject: "timezone offset",
818
- statement: "User timezone offset is UTC-8",
819
- importance: 0.5,
820
- accessCount: 0,
821
- firstSeenAt: now + 1000,
822
- });
823
- insertItemSource(db, "item-rare", "msg-rare", now + 1000);
824
-
825
- // Mock Qdrant with the frequently accessed item scoring higher
826
- mockQdrantResults = [
827
- {
828
- id: "emb-freq",
829
- score: 0.92,
830
- payload: {
831
- target_type: "item",
832
- target_id: "item-freq",
833
- text: "User timezone is America/Los_Angeles",
834
- kind: "identity",
835
- status: "active",
836
- created_at: now,
837
- last_seen_at: now,
838
- },
839
- },
840
- {
841
- id: "emb-rare",
842
- score: 0.75,
843
- payload: {
844
- target_type: "item",
845
- target_id: "item-rare",
846
- text: "User timezone offset is UTC-8",
847
- kind: "identity",
848
- status: "active",
849
- created_at: now + 1000,
850
- last_seen_at: now + 1000,
851
- },
852
- },
853
- ];
854
-
855
- const recall = await buildMemoryRecall(
856
- "timezone",
857
- "conv-access",
858
- TEST_CONFIG,
859
- );
860
-
861
- expect(recall.enabled).toBe(true);
862
- // Frequently accessed timezone item should be in injected text
863
- expect(recall.injectedText).toContain("America/Los_Angeles");
864
- });
865
- });
866
-
867
- // -------------------------------------------------------------------------
868
- // Multi-source recall consistency
869
- // -------------------------------------------------------------------------
870
-
871
- describe("multi-source recall", () => {
872
- test("segments are surfaced via semantic search alongside items", async () => {
873
- const db = getDb();
874
- const now = 1_700_000_500_000;
875
- insertConversation(db, "conv-multi", now, 1);
876
-
877
- // Segment (also indexed in Qdrant via item source)
878
- insertMessage(
879
- db,
880
- "msg-seg",
881
- "conv-multi",
882
- "user",
883
- "Deploy to staging before production always",
884
- now,
885
- );
886
- insertSegment(
887
- db,
888
- "seg-deploy",
889
- "msg-seg",
890
- "conv-multi",
891
- "user",
892
- "Deploy to staging before production always",
893
- now,
894
- );
895
-
896
- // Item (constraint kind)
897
- insertItem(db, {
898
- id: "item-deploy-rule",
899
- kind: "constraint",
900
- subject: "deployment rule",
901
- statement: "Always deploy to staging before production",
902
- importance: 0.9,
903
- firstSeenAt: now,
904
- });
905
- insertItemSource(db, "item-deploy-rule", "msg-seg", now);
906
-
907
- // Mock Qdrant to return the deployment rule item
908
- mockQdrantResults = [
909
- {
910
- id: "emb-deploy-rule",
911
- score: 0.91,
912
- payload: {
913
- target_type: "item",
914
- target_id: "item-deploy-rule",
915
- text: "Always deploy to staging before production",
916
- kind: "constraint",
917
- status: "active",
918
- created_at: now,
919
- last_seen_at: now,
920
- },
921
- },
922
- ];
923
-
924
- const recall = await buildMemoryRecall(
925
- "deployment staging production",
926
- "conv-multi",
927
- TEST_CONFIG,
928
- );
929
-
930
- expect(recall.enabled).toBe(true);
931
- // Deployment rule should be injected
932
- expect(recall.injectedText).toContain("staging");
933
- });
934
-
935
- test("recall with no matching content returns empty injection", async () => {
936
- const db = getDb();
937
- const now = 1_700_000_600_000;
938
- insertConversation(db, "conv-empty", now);
939
-
940
- const recall = await buildMemoryRecall(
941
- "completely unrelated xyzzy topic",
942
- "conv-empty",
943
- TEST_CONFIG,
944
- );
945
-
946
- expect(recall.injectedText).toBe("");
947
- expect(recall.injectedTokens).toBe(0);
948
- });
949
- });
950
-
951
- // -------------------------------------------------------------------------
952
- // Precision@K helpers
953
- // -------------------------------------------------------------------------
954
-
955
- describe("precision@k assertions", () => {
956
- test("assertPrecisionAtK passes when enough expected items are found", () => {
957
- const text = "item-a is here, item-b is here, item-c is here";
958
- assertPrecisionAtK(text, ["item-a", "item-b", "item-c"], 3);
959
- assertPrecisionAtK(text, ["item-a", "item-b", "item-c", "item-d"], 3);
960
- });
961
-
962
- test("assertPrecisionAtK fails when too few expected items are found", () => {
963
- const text = "only item-a is here";
964
- expect(() => {
965
- assertPrecisionAtK(
966
- text,
967
- ["item-a", "item-b", "item-c"],
968
- 2,
969
- "test-label",
970
- );
971
- }).toThrow(
972
- /precision@3 too low.*found 1\/3.*need at least 2.*Missing: item-b, item-c/,
973
- );
974
- });
975
-
976
- test("precision@k guard verifies pipeline completes with seeded items", async () => {
977
- const db = getDb();
978
- const now = 1_700_000_700_000;
979
- insertConversation(db, "conv-pk", now, 3);
980
-
981
- const prefs = [
982
- {
983
- msg: "msg-pk-1",
984
- item: "item-pk-1",
985
- text: "I prefer dark mode over light mode",
986
- },
987
- {
988
- msg: "msg-pk-2",
989
- item: "item-pk-2",
990
- text: "I like using TypeScript for all projects",
991
- },
992
- {
993
- msg: "msg-pk-3",
994
- item: "item-pk-3",
995
- text: "I prefer tabs over spaces for indentation",
996
- },
997
- ];
998
-
999
- for (let i = 0; i < prefs.length; i++) {
1000
- const p = prefs[i]!;
1001
- const t = now + i * 1000;
1002
- insertMessage(db, p.msg, "conv-pk", "user", p.text, t);
1003
- insertItem(db, {
1004
- id: p.item,
1005
- kind: "preference",
1006
- subject: `preference-${i}`,
1007
- statement: p.text,
1008
- importance: 0.8,
1009
- firstSeenAt: t,
1010
- });
1011
- insertItemSource(db, p.item, p.msg, t);
1012
- }
1013
-
1014
- // Mock Qdrant to return all three preference items
1015
- mockQdrantResults = prefs.map((p, i) => ({
1016
- id: `emb-pk-${i}`,
1017
- score: 0.9 - i * 0.05,
1018
- payload: {
1019
- target_type: "item",
1020
- target_id: p.item,
1021
- text: p.text,
1022
- kind: "preference",
1023
- status: "active",
1024
- created_at: now + i * 1000,
1025
- last_seen_at: now + i * 1000,
1026
- },
1027
- }));
1028
-
1029
- const recall = await buildMemoryRecall(
1030
- "what do I prefer",
1031
- "conv-pk",
1032
- TEST_CONFIG,
1033
- );
1034
-
1035
- // Semantic search returns all three preference items which pass
1036
- // tier classification and are injected.
1037
- expect(recall.enabled).toBe(true);
1038
- assertPrecisionAtK(
1039
- recall.injectedText,
1040
- ["dark mode", "TypeScript", "tabs"],
1041
- 2,
1042
- "preference recall precision",
1043
- );
1044
- });
1045
- });
1046
- });