@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
package/ARCHITECTURE.md CHANGED
@@ -692,15 +692,10 @@ graph LR
692
692
  MSG["messages<br/>───────────────<br/>id, conversation_id (FK)<br/>role: user | assistant<br/>content: JSON array<br/>created_at"]
693
693
  TOOL["tool_invocations<br/>───────────────<br/>tool_name, input, result<br/>decision, risk_level<br/>duration_ms"]
694
694
  SEG["memory_segments<br/>───────────────<br/>Text chunks for retrieval<br/>Linked to messages<br/>token_estimate per segment"]
695
- FTS["memory_segment_fts<br/>───────────────<br/>FTS5 virtual table<br/>Auto-synced via triggers<br/>Powers lexical search"]
696
695
  ITEMS["memory_items<br/>───────────────<br/>Extracted facts/entities<br/>kind, subject, statement<br/>confidence, fingerprint (dedup)<br/>verification_state, scope_id<br/>first/last seen timestamps"]
697
- CONFLICTS["memory_item_conflicts<br/>───────────────<br/>Pending/resolved contradiction pairs<br/>existing_item_id + candidate_item_id<br/>clarification question + resolution note<br/>partial unique pending pair index"]
698
- ENTITIES["memory_entities<br/>───────────────<br/>Canonical entities + aliases<br/>mention_count, first/last seen<br/>Resolved across messages"]
699
- RELS["memory_entity_relations<br/>───────────────<br/>Directional entity edges<br/>Unique by source/target/relation<br/>first/last seen + evidence"]
700
- ITEM_ENTS["memory_item_entities<br/>───────────────<br/>Join table linking extracted<br/>memory_items to entities"]
701
696
  SUM["memory_summaries<br/>───────────────<br/>scope: conversation | weekly<br/>Compressed history for context<br/>window management"]
702
697
  EMB["memory_embeddings<br/>───────────────<br/>target: segment | item | summary<br/>provider + model metadata<br/>vector_json (float array)<br/>Powers semantic search"]
703
- JOBS["memory_jobs<br/>───────────────<br/>Async task queue<br/>Types: embed, extract,<br/>summarize, backfill,<br/>conflict resolution, cleanup<br/>Status: pending → running →<br/>completed | failed"]
698
+ JOBS["memory_jobs<br/>───────────────<br/>Async task queue<br/>Types: embed, extract,<br/>summarize, backfill, cleanup<br/>Status: pending → running →<br/>completed | failed"]
704
699
  ATT["attachments<br/>───────────────<br/>base64-encoded file data<br/>mime_type, size_bytes<br/>Linked to messages via<br/>message_attachments join"]
705
700
  REM["reminders<br/>───────────────<br/>One-time scheduled reminders<br/>label, message, fireAt<br/>mode: notify | execute<br/>status: pending → fired | cancelled<br/>routing_intent: single_channel |<br/>multi_channel | all_channels<br/>routing_hints_json (free-form)"]
706
701
  SCHED_JOBS["cron_jobs (recurrence schedules)<br/>───────────────<br/>Recurring schedule definitions<br/>cron_expression: cron or RRULE string<br/>schedule_syntax: 'cron' | 'rrule'<br/>timezone, message, next_run_at<br/>enabled, retry_count<br/>Legacy alias: scheduleJobs"]
@@ -940,8 +935,7 @@ graph TB
940
935
  end
941
936
 
942
937
  subgraph "Text Q&A Session"
943
- TEXT_TOOLS["Tools: sandbox file_* / bash,<br/>host_file_* / host_bash,<br/>ui_show, ...<br/>+ dynamically projected skill tools<br/>(browser_* via bundled browser skill)"]
944
- ESCALATE["computer_use_request_control<br/>(proxy tool)"]
938
+ TEXT_TOOLS["Tools: sandbox file_* / bash,<br/>host_file_* / host_bash,<br/>ui_show, ...<br/>+ dynamically projected skill tools<br/>(browser_* via bundled browser skill,<br/>computer_use_* via bundled computer-use skill)"]
945
939
  end
946
940
 
947
941
  SUBMIT --> SLASH_CHECK
@@ -953,22 +947,21 @@ graph TB
953
947
  CLASSIFIER -->|"text_qa"| QA_ROUTE
954
948
 
955
949
  QA_ROUTE --> TEXT_TOOLS
956
- TEXT_TOOLS -.->|"User explicitly requests<br/>computer control"| ESCALATE
957
- ESCALATE -.->|"Creates CU session<br/>via surfaceProxyResolver"| CU_ROUTE
950
+ TEXT_TOOLS -.->|"computer_use_* actions<br/>forwarded via HostCuProxy"| CU_ROUTE
958
951
  ```
959
952
 
960
953
  ### Action Execution Hierarchy
961
954
 
962
955
  The text_qa system prompt includes an action execution hierarchy that guides tool selection toward the least invasive method:
963
956
 
964
- | Priority | Method | Tool | When to use |
965
- | --------------- | ------------------------------ | ------------------------------------- | ----------------------------------------------------------- |
966
- | **BEST** | Sandboxed filesystem/shell | `file_*`, `bash` | Work that can stay isolated in sandbox filesystem |
967
- | **BETTER** | Explicit host filesystem/shell | `host_file_*`, `host_bash` | Host reads/writes/commands that must touch the real machine |
968
- | **GOOD** | Headless browser | `browser_*` (bundled `browser` skill) | Web automation, form filling, scraping (background) |
969
- | **LAST RESORT** | Foreground computer use | `computer_use_request_control` | Only on explicit user request ("go ahead", "take over") |
957
+ | Priority | Method | Tool | When to use |
958
+ | --------------- | ------------------------------ | ----------------------------------------------- | ----------------------------------------------------------- |
959
+ | **BEST** | Sandboxed filesystem/shell | `file_*`, `bash` | Work that can stay isolated in sandbox filesystem |
960
+ | **BETTER** | Explicit host filesystem/shell | `host_file_*`, `host_bash` | Host reads/writes/commands that must touch the real machine |
961
+ | **GOOD** | Headless browser | `browser_*` (bundled `browser` skill) | Web automation, form filling, scraping (background) |
962
+ | **LAST RESORT** | Foreground computer use | `computer_use_*` (bundled `computer-use` skill) | Only on explicit user request ("go ahead", "take over") |
970
963
 
971
- The `computer_use_request_control` tool is a core proxy tool available only to text*qa sessions. When invoked, the session's `surfaceProxyResolver` creates a CU session and sends a `task_routed` message to the client, effectively escalating from text_qa to foreground computer use. The CU session constructor sets `preactivatedSkillIds: ['computer-use']`, and its `getProjectedCuToolDefinitions()` calls `projectSkillTools()` to load the 12 `computer_use*\*`action tools from the bundled`computer-use` skill (via TOOLS.json). These tools are not core-registered at daemon startup; they exist only within CU sessions through skill projection.
964
+ Computer-use tools are proxy tools provided by the bundled `computer-use` skill, preactivated via `preactivatedSkillIds` in desktop sessions. Each tool forwards actions to the connected macOS client via `HostCuProxy`, which handles request/resolve proxying, step counting, loop detection, and observation formatting within the unified agent loop. These tools are not core-registered at daemon startup; they exist only through skill projection.
972
965
 
973
966
  ### Sandbox Filesystem and Host Access
974
967
 
@@ -988,7 +981,7 @@ graph TB
988
981
  SBPL --> SB_FS["Sandbox filesystem root<br/>~/.vellum/workspace"]
989
982
  BWRAP --> SB_FS
990
983
 
991
- EXEC -->|"host_file_* / host_bash / computer_use_request_control"| HOST_TOOLS["Host-target tools<br/>(unchanged by backend choice)"]
984
+ EXEC -->|"host_file_* / host_bash"| HOST_TOOLS["Host-target tools<br/>(unchanged by backend choice)"]
992
985
  EXEC -->|"computer_use_* (skill-projected<br/>in CU sessions only)"| SKILL_CU_TOOLS["CU skill tools<br/>(bundled computer-use skill)"]
993
986
  HOST_TOOLS --> CHECK["Permission checker + trust-store"]
994
987
  SKILL_CU_TOOLS --> CHECK
@@ -1005,7 +998,7 @@ graph TB
1005
998
  - **Host tools unchanged**: `host_bash`, `host_file_read`, `host_file_write`, and `host_file_edit` always execute directly on the host regardless of which sandbox backend is active.
1006
999
  - Sandbox defaults: `file_*` and `bash` execute within `~/.vellum/workspace`.
1007
1000
  - Host access is explicit: `host_file_read`, `host_file_write`, `host_file_edit`, and `host_bash` are separate tools.
1008
- - Prompt defaults: host tools, `computer_use_request_control`, and `computer_use_*` skill-projected actions default to `ask` unless a trust rule allowlists/denylists them.
1001
+ - Prompt defaults: host tools and `computer_use_*` skill-projected actions default to `ask` unless a trust rule allowlists/denylists them.
1009
1002
  - Browser tool defaults: all `browser_*` tools are auto-allowed by default via seeded allow rules at priority 100, preserving the frictionless UX from when browser was a core tool.
1010
1003
  - Confirmation payloads include `executionTarget` (`sandbox` or `host`) so clients can label where the action will run.
1011
1004
 
@@ -1187,16 +1180,16 @@ skills/<skill-id>/
1187
1180
 
1188
1181
  The following capabilities ship as bundled skills in `assistant/src/config/bundled-skills/`:
1189
1182
 
1190
- | Skill ID | Tools | Purpose |
1191
- | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
1192
- | `browser` | `browser_navigate`, `browser_snapshot`, `browser_screenshot`, `browser_close`, `browser_click`, `browser_type`, `browser_press_key`, `browser_wait_for`, `browser_extract`, `browser_fill_credential` | Headless browser automation — web scraping, form filling, interaction (previously core-registered as `headless-browser`; now skill-provided with default allow rules) |
1193
- | `gmail` | Gmail search, archive, send, etc. | Email management via OAuth2 integration |
1194
- | `claude-code` | Claude Code tool | Delegate coding tasks to Claude Code subprocess |
1195
- | `computer-use` | `computer_use_click`, `computer_use_double_click`, `computer_use_right_click`, `computer_use_type_text`, `computer_use_key`, `computer_use_scroll`, `computer_use_drag`, `computer_use_open_app`, `computer_use_run_applescript`, `computer_use_wait`, `computer_use_done`, `computer_use_respond` | Computer-use action tools — internally preactivated by `ComputerUseSession` via `preactivatedSkillIds`; not user-invocable or model-discoverable in text sessions. Each wrapper script forwards to `forwardComputerUseProxyTool()` which uses the session's proxy resolver to send actions to the macOS client. |
1196
- | `weather` | `get-weather` | Fetch current weather data |
1197
- | `app-builder` | `app_create`, `app_list`, `app_query`, `app_update`, `app_delete`, `app_file_list`, `app_file_read`, `app_file_edit`, `app_file_write` | Dynamic app authoring — CRUD and file-level editing for persistent apps (activated via `skill_load app-builder`; `app_open` remains a core proxy tool) |
1198
- | `self-upgrade` | (instruction-only) | Self-improvement workflow |
1199
- | `start-the-day` | (instruction-only) | Morning briefing routine |
1183
+ | Skill ID | Tools | Purpose |
1184
+ | --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
1185
+ | `browser` | `browser_navigate`, `browser_snapshot`, `browser_screenshot`, `browser_close`, `browser_click`, `browser_type`, `browser_press_key`, `browser_wait_for`, `browser_extract`, `browser_fill_credential` | Headless browser automation — web scraping, form filling, interaction (previously core-registered as `headless-browser`; now skill-provided with default allow rules) |
1186
+ | `gmail` | Gmail search, archive, send, etc. | Email management via OAuth2 integration |
1187
+ | `claude-code` | Claude Code tool | Delegate coding tasks to Claude Code subprocess |
1188
+ | `computer-use` | `computer_use_observe`, `computer_use_click`, `computer_use_type_text`, `computer_use_key`, `computer_use_scroll`, `computer_use_drag`, `computer_use_wait`, `computer_use_open_app`, `computer_use_run_applescript`, `computer_use_done`, `computer_use_respond` | Computer-use proxy tools — preactivated via `preactivatedSkillIds` in desktop sessions. Each tool forwards actions to the connected macOS client via `HostCuProxy`, which handles request/resolve proxying, step counting, loop detection, and observation formatting within the unified agent loop. |
1189
+ | `weather` | `get-weather` | Fetch current weather data |
1190
+ | `app-builder` | `app_create`, `app_list`, `app_query`, `app_update`, `app_delete`, `app_file_list`, `app_file_read`, `app_file_edit`, `app_file_write` | Dynamic app authoring — CRUD and file-level editing for persistent apps (activated via `skill_load app-builder`; `app_open` remains a core proxy tool) |
1191
+ | `self-upgrade` | (instruction-only) | Self-improvement workflow |
1192
+ | `start-the-day` | (instruction-only) | Morning briefing routine |
1200
1193
 
1201
1194
  ### Activation and Projection Flow
1202
1195
 
@@ -1240,7 +1233,7 @@ graph TB
1240
1233
  RESOLVE --> PROVIDER
1241
1234
  ```
1242
1235
 
1243
- **Internal preactivation**: Some bundled skills are preactivated programmatically rather than by user slash commands or model discovery. For example, `ComputerUseSession` sets `preactivatedSkillIds: ['computer-use']` in its constructor, causing `projectSkillTools()` to load the 12 `computer_use_*` tool definitions from the bundled skill's `TOOLS.json` on the first turn. These tools are never exposed in text sessions they only appear in the CU session's agent loop.
1236
+ **Internal preactivation**: Some bundled skills are preactivated programmatically rather than by user slash commands or model discovery. For example, desktop sessions set `preactivatedSkillIds: ['computer-use']`, causing `projectSkillTools()` to load the 11 `computer_use_*` tool definitions from the bundled skill's `TOOLS.json` on the first turn. These proxy tools forward actions to the connected macOS client via `HostCuProxy`.
1244
1237
 
1245
1238
  ### Skill Tool Execution
1246
1239
 
@@ -1917,10 +1910,8 @@ Connected channels are resolved at signal emission time: vellum is always includ
1917
1910
  | User preferences | UserDefaults | plist | Foundation | Permanent |
1918
1911
  | Session logs | `~/Library/.../logs/session-*.json` | JSON per session | Swift Codable | Unbounded |
1919
1912
  | Conversations & messages | `~/.vellum/workspace/data/db/assistant.db` | SQLite + WAL | Drizzle ORM (Bun) | Permanent |
1920
- | Memory segments & FTS | `~/.vellum/workspace/data/db/assistant.db` | SQLite FTS5 | Drizzle ORM | Permanent |
1913
+ | Memory segments | `~/.vellum/workspace/data/db/assistant.db` | SQLite | Drizzle ORM | Permanent |
1921
1914
  | Extracted facts | `~/.vellum/workspace/data/db/assistant.db` | SQLite | Drizzle ORM | Permanent, deduped |
1922
- | Conflict lifecycle rows | `~/.vellum/workspace/data/db/assistant.db` | SQLite | Drizzle ORM | Pending until clarified, then retained as resolved history |
1923
- | Entity graph (entities/relations/item links) | `~/.vellum/workspace/data/db/assistant.db` | SQLite | Drizzle ORM | Permanent, deduped by unique relation edge |
1924
1915
  | Embeddings | `~/.vellum/workspace/data/db/assistant.db` | JSON float arrays | Drizzle ORM | Permanent |
1925
1916
  | Async job queue | `~/.vellum/workspace/data/db/assistant.db` | SQLite | Drizzle ORM | Completed jobs persist |
1926
1917
  | Attachments | `~/.vellum/workspace/data/db/assistant.db` | Base64 in SQLite | Drizzle ORM | Permanent |
package/README.md CHANGED
@@ -16,7 +16,7 @@ CLI / macOS app / iOS app
16
16
  │ ├── Google Gemini (secondary)
17
17
  │ └── Ollama (local models)
18
18
 
19
- ├── Memory System (FTS5 + Qdrant + Entity Graph)
19
+ ├── Memory System (Qdrant Hybrid Search)
20
20
  ├── Skill Tool System (bundled + managed + workspace)
21
21
  ├── Swarm Orchestration (DAG scheduler + worker pool)
22
22
  ├── Script Proxy (credential injection + MITM)
@@ -99,7 +99,7 @@ assistant/
99
99
  │ ├── daemon/ # Daemon server, session management
100
100
  │ ├── agent/ # Agent loop and LLM interaction
101
101
  │ ├── providers/ # LLM provider integrations (Anthropic, OpenAI, Gemini, Ollama)
102
- │ ├── memory/ # Conversation store, memory indexer, recall (FTS5 + Qdrant)
102
+ │ ├── memory/ # Conversation store, memory indexer, recall (Qdrant hybrid search)
103
103
  │ ├── skills/ # Skill catalog, loading, and tool factory
104
104
  │ ├── tools/ # Built-in tool definitions
105
105
  │ ├── swarm/ # Swarm orchestration (DAG scheduler, worker pool)
@@ -446,7 +446,7 @@ If no guardian binding exists, escalation fails closed — the message is denied
446
446
 
447
447
  ## Database
448
448
 
449
- SQLite via Drizzle ORM, stored at `~/.vellum/workspace/data/db/assistant.db`. Key tables include conversations, messages, tool invocations, attachments, memory segments (with FTS5), memory items, entities, reminders, and recurrence schedules (cron + RRULE).
449
+ SQLite via Drizzle ORM, stored at `~/.vellum/workspace/data/db/assistant.db`. Key tables include conversations, messages, tool invocations, attachments, memory segments, memory items, reminders, and recurrence schedules (cron + RRULE).
450
450
 
451
451
  > **Note:** The recurrence schedule system supports both cron expressions and iCalendar RRULE syntax. Use the `expression` field with an explicit `syntax` discriminator. See [`docs/architecture/scheduling.md`](docs/architecture/scheduling.md) for details.
452
452
 
@@ -12,52 +12,51 @@ graph TB
12
12
  INDEX["Memory Indexer"]
13
13
  SEGMENT["Split into segments<br/>→ memory_segments"]
14
14
  EXTRACT_JOB["Enqueue extract_items job<br/>→ memory_jobs"]
15
- CONFLICT_RESOLVE_JOB["Enqueue resolve_pending_conflicts_for_message<br/>(dedupe by type+message+scope)<br/>→ memory_jobs"]
16
15
  SUMMARY_JOB["Enqueue build_conversation_summary<br/>→ memory_jobs"]
17
16
  end
18
17
 
19
18
  subgraph "Background Worker (polls every 1.5s)"
20
19
  WORKER["MemoryJobsWorker"]
21
- EMBED_SEG["embed_segment<br/>→ memory_embeddings"]
22
- EMBED_ITEM["embed_item<br/>→ memory_embeddings"]
23
- EMBED_SUM["embed_summary<br/>→ memory_embeddings"]
24
- EXTRACT["extract_items<br/>→ memory_items +<br/>memory_item_sources"]
25
- CHECK_CONTRA["check_contradictions<br/>→ contradiction/update merge OR<br/>pending_clarification + memory_item_conflicts"]
26
- RESOLVE_PENDING["resolve_pending_conflicts_for_message<br/>message-scoped clarification resolution<br/>→ resolved conflict + item status updates"]
27
- CLEAN_CONFLICTS["cleanup_resolved_conflicts<br/>delete resolved conflict rows<br/>older than retention window"]
28
- CLEAN_SUPERSEDED["cleanup_stale_superseded_items<br/>delete stale superseded items<br/>and item embedding rows"]
29
- EXTRACT_ENTITIES["extract_entities<br/>→ memory_entities +<br/>memory_item_entities +<br/>memory_entity_relations"]
30
- BACKFILL_REL["backfill_entity_relations<br/>checkpointed message scan<br/>→ enqueue extract_entities"]
20
+ EMBED_SEG["embed_segment<br/>→ Qdrant (dense + sparse)"]
21
+ EMBED_ITEM["embed_item<br/>→ Qdrant (dense + sparse)"]
22
+ EMBED_SUM["embed_summary<br/>→ Qdrant (dense + sparse)"]
23
+ EXTRACT["extract_items<br/>→ memory_items +<br/>memory_item_sources<br/>(LLM-directed supersession)"]
24
+ CLEAN_SUPERSEDED["cleanup_stale_superseded_items<br/>delete stale superseded items<br/>and Qdrant vectors"]
31
25
  BUILD_SUM["build_conversation_summary<br/>→ memory_summaries"]
32
- WEEKLY["refresh_weekly_summary<br/>→ memory_summaries"]
33
26
  end
34
27
 
35
- subgraph "Embedding Providers"
28
+ subgraph "Embedding Provider Selection (selectEmbeddingBackend)"
29
+ PROVIDER_SELECT["Provider Selection<br/>auto: local → OpenAI → Gemini → Ollama<br/>or explicit config override"]
36
30
  LOCAL_EMB["Local (ONNX)<br/>bge-small-en-v1.5"]
37
31
  OAI_EMB["OpenAI<br/>text-embedding-3-small"]
38
32
  GEM_EMB["Gemini<br/>gemini-embedding-001"]
39
33
  OLL_EMB["Ollama<br/>nomic-embed-text"]
40
34
  end
41
35
 
36
+ subgraph "Sparse Embedding (in-process)"
37
+ SPARSE_GEN["generateSparseEmbedding()<br/>TF-IDF, FNV-1a hashing<br/>(no external calls)"]
38
+ end
39
+
40
+ subgraph "Qdrant Vector Store"
41
+ DENSE["Named vector: dense<br/>(cosine similarity)"]
42
+ SPARSE["Named vector: sparse<br/>(TF-IDF based)"]
43
+ RRF["Query API:<br/>Reciprocal Rank Fusion"]
44
+ end
45
+
42
46
  subgraph "Read Path (Memory Recall)"
47
+ NEEDS_MEM["needsMemory gate<br/>(skip short/empty/tool-result turns)"]
43
48
  QUERY["Recall Query Builder<br/>User request + compacted context summary"]
44
- CONFLICT_GATE["Soft Conflict Gate<br/>dismiss non-actionable conflicts (kind + statement + provenance policy)<br/>attempt internal resolution from user turn<br/>relevance-based; never produces user-facing prompts"]
45
- PROFILE_BUILD["Dynamic Profile Compiler<br/>active trusted profile memories<br/>user_confirmed > user_reported > assistant_inferred"]
46
- PROFILE_INJECT["Inject profile context block<br/>into runtime user tail<br/>(strict token cap)"]
47
49
  BUDGET["Dynamic Recall Budget<br/>computeRecallBudget()<br/>from prompt headroom"]
48
- LEX["Lexical Search<br/>FTS5 on memory_segment_fts"]
49
- SEM["Semantic Search<br/>Qdrant cosine similarity"]
50
- ENTITY_SEARCH["Entity Search<br/>Seed name/alias matching"]
51
- REL_EXPAND["Relation Expansion<br/>1-hop via memory_entity_relations<br/>→ neighbor item links"]
52
- DIRECT["Direct Item Search<br/>LIKE on subject/statement"]
50
+ EMBED_Q["Generate dense + sparse<br/>query embeddings"]
51
+ HYBRID["Hybrid Search<br/>dense + sparse RRF on Qdrant"]
52
+ RECENCY["Recency Search<br/>conversation-scoped, DB only"]
53
+ MERGE["Merge + Deduplicate<br/>weighted score combination"]
53
54
  SCOPE["Scope Filter<br/>scope_id filtering<br/>(strict | global_fallback)<br/>Private threads: own scope + 'default'"]
54
- MERGE["RRF Merge<br/>+ Trust Weighting<br/>+ Freshness Decay"]
55
- CAPS["Source Caps<br/>bound per-source candidate count"]
56
- RERANK["LLM Re-ranking<br/>(Haiku, optional)"]
57
- TRIM["Token Trim<br/>maxInjectTokens override<br/>or static fallback"]
58
- INJECT["Attention-ordered<br/>Injection into prompt"]
59
- TELEMETRY["Emit memory_recalled<br/>hits + relation counters +<br/>ranking diagnostics"]
60
- STRIP_PROFILE["Strip injected dynamic profile block<br/>before persisting conversation history"]
55
+ TIER["Tier Classification<br/>score > 0.8 → tier 1<br/>score > 0.6 → tier 2<br/>below → dropped"]
56
+ STALE["Staleness Computation<br/>kind-specific lifetimes<br/>+ reinforcement from<br/>source conversation count"]
57
+ DEMOTE["Stale Demotion<br/>very_stale tier 1 → tier 2"]
58
+ INJECT["Two-Layer XML Injection<br/>budget-aware rendering"]
59
+ TELEMETRY["Emit memory_recalled<br/>tier counts + hybrid search ms +<br/>staleness stats"]
61
60
  end
62
61
 
63
62
  subgraph "Context Window Management"
@@ -83,49 +82,47 @@ graph TB
83
82
  STORE --> INDEX
84
83
  INDEX --> SEGMENT
85
84
  INDEX --> EXTRACT_JOB
86
- INDEX --> CONFLICT_RESOLVE_JOB
87
85
  INDEX --> SUMMARY_JOB
88
86
 
89
87
  WORKER --> EMBED_SEG
90
88
  WORKER --> EMBED_ITEM
91
89
  WORKER --> EMBED_SUM
92
90
  WORKER --> EXTRACT
93
- WORKER --> CHECK_CONTRA
94
- WORKER --> RESOLVE_PENDING
95
- WORKER --> CLEAN_CONFLICTS
96
91
  WORKER --> CLEAN_SUPERSEDED
97
- WORKER --> EXTRACT_ENTITIES
98
- WORKER --> BACKFILL_REL
99
92
  WORKER --> BUILD_SUM
100
- WORKER --> WEEKLY
101
- EXTRACT --> CHECK_CONTRA
102
- EXTRACT --> EXTRACT_ENTITIES
103
-
104
- EMBED_SEG --> OAI_EMB
105
- EMBED_SEG --> GEM_EMB
106
- EMBED_SEG --> OLL_EMB
107
-
108
- QUERY --> CONFLICT_GATE
109
- CONFLICT_GATE --> PROFILE_BUILD
110
- PROFILE_BUILD --> PROFILE_INJECT
111
- CONFLICT_GATE --> LEX
112
- CONFLICT_GATE --> SEM
113
- CONFLICT_GATE --> ENTITY_SEARCH
114
- CONFLICT_GATE --> DIRECT
115
- LEX --> SCOPE
116
- SEM --> SCOPE
117
- ENTITY_SEARCH --> REL_EXPAND
118
- REL_EXPAND --> SCOPE
119
- DIRECT --> SCOPE
93
+
94
+ EMBED_SEG --> PROVIDER_SELECT
95
+ EMBED_ITEM --> PROVIDER_SELECT
96
+ EMBED_SUM --> PROVIDER_SELECT
97
+ PROVIDER_SELECT --> LOCAL_EMB
98
+ PROVIDER_SELECT --> OAI_EMB
99
+ PROVIDER_SELECT --> GEM_EMB
100
+ PROVIDER_SELECT --> OLL_EMB
101
+ LOCAL_EMB --> DENSE
102
+ OAI_EMB --> DENSE
103
+ GEM_EMB --> DENSE
104
+ OLL_EMB --> DENSE
105
+ EMBED_SEG --> SPARSE_GEN
106
+ EMBED_ITEM --> SPARSE_GEN
107
+ EMBED_SUM --> SPARSE_GEN
108
+ SPARSE_GEN --> SPARSE
109
+
110
+ NEEDS_MEM --> QUERY
111
+ QUERY --> EMBED_Q
112
+ EMBED_Q --> PROVIDER_SELECT
113
+ EMBED_Q --> SPARSE_GEN
114
+ EMBED_Q --> HYBRID
115
+ HYBRID --> RRF
116
+ QUERY --> RECENCY
117
+ HYBRID --> SCOPE
118
+ RECENCY --> SCOPE
120
119
  SCOPE --> MERGE
121
- MERGE --> CAPS
122
- CAPS --> RERANK
123
- RERANK --> TRIM
124
- BUDGET --> TRIM
125
- TRIM --> INJECT
126
- PROFILE_INJECT --> INJECT
120
+ MERGE --> TIER
121
+ TIER --> STALE
122
+ STALE --> DEMOTE
123
+ BUDGET --> INJECT
124
+ DEMOTE --> INJECT
127
125
  INJECT --> TELEMETRY
128
- INJECT --> STRIP_PROFILE
129
126
 
130
127
  CTX --> COMPACT
131
128
  COMPACT --> GUARDS
@@ -158,92 +155,159 @@ The key distinction: normal compaction is a cost-optimized background process th
158
155
 
159
156
  ### Memory Retrieval Config Knobs (Defaults)
160
157
 
161
- | Config key | Default | Purpose |
162
- | --------------------------------------------------------- | ----------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------ |
163
- | `memory.retrieval.dynamicBudget.enabled` | `true` | Toggle per-turn recall budget calculation from live prompt headroom. |
164
- | `memory.retrieval.dynamicBudget.minInjectTokens` | `1200` | Lower clamp for computed recall injection budget. |
165
- | `memory.retrieval.dynamicBudget.maxInjectTokens` | `10000` | Upper clamp for computed recall injection budget. |
166
- | `memory.retrieval.dynamicBudget.targetHeadroomTokens` | `10000` | Reserved headroom to keep free for response generation/tool traces. |
167
- | `memory.entity.extractRelations.enabled` | `true` | Enable relation edge extraction and persistence in `memory_entity_relations`. |
168
- | `memory.entity.extractRelations.backfillBatchSize` | `200` | Batch size for checkpointed `backfill_entity_relations` jobs. |
169
- | `memory.entity.relationRetrieval.enabled` | `true` | Enable one-hop relation expansion from matched seed entities at recall time. |
170
- | `memory.entity.relationRetrieval.maxSeedEntities` | `8` | Maximum matched seed entities from the query. |
171
- | `memory.entity.relationRetrieval.maxNeighborEntities` | `20` | Maximum unique neighbor entities expanded from relation edges. |
172
- | `memory.entity.relationRetrieval.maxEdges` | `40` | Maximum relation edges traversed during expansion. |
173
- | `memory.entity.relationRetrieval.neighborScoreMultiplier` | `0.7` | Downweight multiplier for relation-expanded candidates vs direct entity hits. |
174
- | `memory.conflicts.enabled` | `true` | Enable soft conflict gate for unresolved `memory_item_conflicts`. |
175
- | `memory.conflicts.resolverLlmTimeoutMs` | `12000` | Timeout bound for clarification resolver LLM fallback. |
176
- | `memory.conflicts.relevanceThreshold` | `0.3` | Similarity threshold for deciding whether a pending conflict is relevant to the current request. |
177
- | `memory.conflicts.gateMode` | `'soft'` | Conflict gate strategy. Currently only `'soft'` is supported (resolves conflicts internally without user prompts). |
178
- | `memory.conflicts.conflictableKinds` | `['preference', 'profile', 'constraint', 'instruction', 'style']` | Memory item kinds eligible for conflict detection. Items with kinds outside this list are auto-dismissed. |
179
- | `memory.profile.enabled` | `true` | Enable dynamic profile compilation from active trusted profile/preference/constraint/instruction memories. |
180
- | `memory.profile.maxInjectTokens` | `800` | Hard token cap enforced by `ProfileCompiler` when generating the runtime profile block. |
158
+ | Config key | Default | Purpose |
159
+ | ----------------------------------------------------- | ------------------------: | -------------------------------------------------------------------- |
160
+ | `memory.retrieval.dynamicBudget.enabled` | `true` | Toggle per-turn recall budget calculation from live prompt headroom. |
161
+ | `memory.retrieval.dynamicBudget.minInjectTokens` | `1200` | Lower clamp for computed recall injection budget. |
162
+ | `memory.retrieval.dynamicBudget.maxInjectTokens` | `10000` | Upper clamp for computed recall injection budget. |
163
+ | `memory.retrieval.dynamicBudget.targetHeadroomTokens` | `10000` | Reserved headroom to keep free for response generation/tool traces. |
164
+ | `memory.retrieval.maxInjectTokens` | `10000` | Static fallback when dynamic budget is disabled. |
165
+ | `memory.retrieval.scopePolicy` | `'allow_global_fallback'` | Scope filtering strategy: `'strict'` or `'allow_global_fallback'`. |
181
166
 
182
167
  ### Memory Recall Debugging Playbook
183
168
 
184
169
  1. Run a recall-heavy turn and inspect `memory_recalled` events in the client trace stream.
185
170
  2. Validate baseline counters:
186
- - `lexicalHits`, `semanticHits`, `recencyHits`, `entityHits`
187
- - `relationSeedEntityCount`, `relationTraversedEdgeCount`, `relationNeighborEntityCount`, `relationExpandedItemCount`
171
+ - `semanticHits`, `recencyHits`
172
+ - `tier1Count`, `tier2Count`
173
+ - `hybridSearchLatencyMs`
188
174
  - `mergedCount`, `selectedCount`, `injectedTokens`, `latencyMs`
189
175
  3. Cross-check context pressure with `context_compacted` events:
190
176
  - `previousEstimatedInputTokens` vs `estimatedInputTokens`
191
177
  - `summaryCalls`, `compactedMessages`
192
178
  4. If dynamic budget is enabled, verify `injectedTokens` stays within the configured min/max clamps for `dynamicBudget`.
193
- 5. Run `bun run src/index.ts memory status` and confirm cleanup pressure signals:
194
- - `Pending conflicts`, `Resolved conflicts`, `Oldest pending conflict age`
195
- - job queue counts for `cleanup_resolved_conflicts` / `cleanup_stale_superseded_items`
196
- 6. Before tuning ranking or relation settings, run:
179
+ 5. Inspect staleness distribution in debug logs:
180
+ - `fresh`, `aging`, `stale`, `very_stale` counts
181
+ - Check for unexpected tier demotions (very_stale tier 1 items demoted to tier 2)
182
+ 6. Before tuning ranking settings, run:
197
183
  - `cd assistant && bun test src/__tests__/context-memory-e2e.test.ts`
198
184
  - `cd assistant && bun test src/__tests__/memory-context-benchmark.benchmark.test.ts`
199
185
  - `cd assistant && bun test src/__tests__/memory-recall-quality.test.ts`
200
- - `cd assistant && bun test src/__tests__/memory-regressions.test.ts -t "relation"`
201
186
  7. After tuning, rerun the same suite and compare:
202
- - relation counters (coverage)
187
+ - tier counts (coverage)
203
188
  - selected count / injected tokens (budget safety)
204
189
  - latency and ordering regressions via top candidate snapshots
205
190
 
206
- ### Conflict Lifecycle and Profile Hygiene
191
+ ### Write Path — Extraction and Supersession
207
192
 
208
193
  ```mermaid
209
194
  stateDiagram-v2
210
- [*] --> ActiveItems : extract_items/check_contradictions
211
- ActiveItems --> PendingConflict : ambiguous_contradiction\n(candidate -> pending_clarification)
212
- PendingConflict --> PendingConflict : internal evaluation\n(relevance check, no user prompt)
213
- PendingConflict --> Dismissed : non-actionable\n(kind policy + transient statement filter)
214
- PendingConflict --> ResolvedKeepExisting : clarification resolver\n+ applyConflictResolution
215
- PendingConflict --> ResolvedKeepCandidate : clarification resolver\n+ applyConflictResolution
216
- PendingConflict --> ResolvedMerge : clarification resolver\n+ applyConflictResolution
217
- ResolvedKeepExisting --> CleanupConflicts : cleanup_resolved_conflicts
218
- ResolvedKeepCandidate --> CleanupConflicts : cleanup_resolved_conflicts
219
- ResolvedMerge --> CleanupConflicts : cleanup_resolved_conflicts
220
- ResolvedKeepExisting --> SupersededItems : candidate superseded
221
- ResolvedMerge --> SupersededItems : merged-from candidate superseded
222
- SupersededItems --> CleanupItems : cleanup_stale_superseded_items
195
+ [*] --> ActiveItem : extract_items\n(LLM or pattern-based)
196
+ ActiveItem --> Superseded : explicit supersession\n(overrideConfidence = "explicit"\n+ supersedes = oldItemId)
197
+ ActiveItem --> ActiveItem : tentative/inferred override\n(both items coexist)
198
+ ActiveItem --> Superseded : subject-match fallback\n(same kind + subject,\nno LLM-directed supersession)
199
+ Superseded --> Cleanup : cleanup_stale_superseded_items\n(delete from DB + Qdrant)
223
200
  ```
224
201
 
225
- ### Internal-Only Conflict Handling
202
+ **Item extraction** uses LLM-powered extraction (with pattern-based fallback) to identify memorable information from conversation messages. Each extracted item belongs to one of six kinds:
203
+
204
+ | Kind | Description | Base Lifetime |
205
+ | ------------ | ------------------------------------------------- | ------------- |
206
+ | `identity` | Personal info, facts, relationships | 6 months |
207
+ | `preference` | Likes, dislikes, preferred approaches/tools | 3 months |
208
+ | `constraint` | Rules, requirements, directives | 1 month |
209
+ | `project` | Project details, repos, tech stacks, action items | 2 weeks |
210
+ | `decision` | Choices made, approaches selected | 2 weeks |
211
+ | `event` | Deadlines, milestones, meetings, dates | 3 days |
212
+
213
+ **Supersession chains** replace the old conflict resolution system. When the LLM extracts a new item that updates an existing one, it sets `supersedes` to the old item's ID and `overrideConfidence` to one of three levels:
214
+
215
+ - `explicit` — Clear override signal (e.g. "I changed my mind about X"). The old item is marked `superseded` and removed from Qdrant.
216
+ - `tentative` — Ambiguous; both items coexist as active.
217
+ - `inferred` — Weak signal; both items coexist (logged for observability).
218
+
219
+ A fallback subject-match supersession also runs for items without LLM-directed supersession: same kind + same subject = old item superseded.
220
+
221
+ **Semantic density gating** skips extraction for messages that are too short, consist of low-value filler (e.g. "ok", "thanks", "got it"), or have fewer than 3 words.
222
+
223
+ ### Read Path — Hybrid Recall Pipeline
224
+
225
+ The recall pipeline runs on every turn that passes the `needsMemory` gate (skips empty, very short, and tool-result-only turns). The pipeline is orchestrated by `buildMemoryRecall()` in `retriever.ts`:
226
+
227
+ 1. **Query construction** (`query-builder.ts`): Combines the user request text (up to 2000 chars) with any in-context session summary (up to 1200 chars).
228
+
229
+ 2. **Dense + sparse embedding generation**: The query is embedded using the configured embedding provider (auto-selection order: local → OpenAI → Gemini → Ollama). A TF-IDF sparse embedding is also generated in-process using FNV-1a hashing to a 30K vocabulary with sub-linear TF weighting and L2 normalization.
230
+
231
+ 3. **Hybrid search on Qdrant**: When both dense and sparse vectors are available, the pipeline uses Qdrant's query API with two prefetch stages (dense and sparse, each fetching up to 40 candidates) fused via Reciprocal Rank Fusion (RRF). Falls back to dense-only search when sparse vectors are unavailable.
232
+
233
+ 4. **Recency supplement**: A DB-only recency search fetches the 5 most recent segments from the current conversation, providing conversation-local context even when vector search misses.
234
+
235
+ 5. **Merge and deduplicate**: Hybrid and recency candidates are merged by key. Duplicate entries keep the highest scores from each source. A weighted final score is computed: `semantic * 0.7 + recency * 0.2 + confidence * 0.1`.
226
236
 
227
- Memory conflict resolution is entirely internal and non-interruptive. The conflict gate evaluates pending conflicts on each turn, dismisses non-actionable ones (based on kind policy, statement eligibility, coherence, and provenance), and attempts resolution when user input looks like a natural clarification. At no point does the conflict system produce user-facing clarification prompts, inject conflict instructions into the assistant's response, or block the user's request. The user is never aware that a conflict exists; the runtime response path always continues answering the user's actual request. This invariant is enforced across the conflict gate (`session-conflict-gate.ts`), session memory (`session-memory.ts`), session agent loop (`session-agent-loop.ts`), and runtime assembly (`session-runtime-assembly.ts`).
237
+ 6. **Tier classification** (`tier-classifier.ts`): Score-based, deterministic classification:
238
+ - `finalScore > 0.8` → **tier 1** (high relevance)
239
+ - `finalScore > 0.6` → **tier 2** (possibly relevant)
240
+ - Below 0.6 → dropped
228
241
 
229
- Runtime profile flow (per turn):
242
+ 7. **Staleness computation** (`staleness.ts`): Each item candidate is annotated with a staleness level based on its age relative to a kind-specific base lifetime (see table above). The effective lifetime is extended by a reinforcement factor: `baseLifetime * (1 + 0.3 * (sourceConversationCount - 1))`, so items mentioned across multiple conversations age more slowly. Staleness levels:
243
+ - `ratio < 0.5` → `fresh`
244
+ - `ratio <= 1.0` → `aging`
245
+ - `ratio <= 2.0` → `stale`
246
+ - `ratio > 2.0` → `very_stale`
230
247
 
231
- 1. `ProfileCompiler` builds a trusted profile block from active `profile` / `preference` / `constraint` / `instruction` items under strict token cap.
232
- 2. Session injects that block only into runtime prompt state.
233
- 3. Session strips the injected profile block before persisting conversation history, so dynamic profile context never pollutes durable message rows.
248
+ 8. **Stale demotion**: `very_stale` tier 1 candidates are demoted to tier 2, preventing old information from occupying prime injection space.
234
249
 
235
- ### Provenance-Aware Memory Pipeline
250
+ 9. **Two-layer XML injection** (`formatting.ts`): Budget-aware rendering into four XML sections:
236
251
 
237
- Every persisted message carries provenance metadata (`provenanceTrustClass`, `provenanceSourceChannel`, etc.) derived from the `TrustContext` resolved by `trust-context-resolver.ts`. This metadata records the trust class of the actor who produced the message and through which channel, enabling downstream trust decisions without re-resolving identity at read time.
252
+ ```xml
253
+ <memory_context>
254
+
255
+ <user_identity>
256
+ <!-- identity-kind tier 1 items (plain statements) -->
257
+ </user_identity>
258
+
259
+ <relevant_context>
260
+ <!-- tier 1 non-identity/non-preference items (episode-wrapped with source attribution) -->
261
+ </relevant_context>
262
+
263
+ <applicable_preferences>
264
+ <!-- preference/constraint tier 1 items (plain statements) -->
265
+ </applicable_preferences>
266
+
267
+ <possibly_relevant>
268
+ <!-- tier 2 items (episode-wrapped with staleness annotations) -->
269
+ </possibly_relevant>
270
+
271
+ </memory_context>
272
+ ```
273
+
274
+ Empty sections are omitted. Each section has a per-item token budget (150 tokens for tier 1, 100 for tier 2). Tier 1 sections consume budget first; tier 2 uses the remainder.
275
+
276
+ 10. **Injection strategy**: The rendered `<memory_context>` block is injected as a separate user + assistant acknowledgment message pair before the last user message (`injectMemoryRecallAsSeparateMessage`). This separates memory context from the user's actual query.
277
+
278
+ ### Internal-Only Trust Gating
279
+
280
+ **Provenance-aware pipeline**: Every persisted message carries provenance metadata (`provenanceTrustClass`, `provenanceSourceChannel`, etc.) derived from the `TrustContext` resolved by `trust-context-resolver.ts`.
238
281
 
239
282
  Two trust gates enforce trust-class-based access control over the memory pipeline:
240
283
 
241
- - **Write gate** (`indexer.ts`): The `extract_items` and `resolve_conflicts` jobs only run for messages from trusted actors (guardian or undefined provenance). Messages from untrusted actors (`trusted_contact`, `unknown`) are still segmented and embedded — so they appear in conversation context — but no profile extraction or conflict resolution is triggered. This prevents untrusted channels from injecting or mutating long-term memory items.
284
+ - **Write gate** (`indexer.ts`): The `extract_items` job only runs for messages from trusted actors (guardian or undefined provenance). Messages from untrusted actors (`trusted_contact`, `unknown`) are still segmented and embedded — so they appear in conversation context — but no item extraction is triggered. This prevents untrusted channels from injecting or mutating long-term memory items.
242
285
 
243
- - **Read gate** (`session-memory.ts`): When the current session's actor is untrusted, the memory recall pipeline returns a no-op context — no recall injection, no dynamic profile, no conflict resolution. This ensures untrusted actors cannot surface or exploit previously extracted memory.
286
+ - **Read gate** (`session-memory.ts`): When the current session's actor is untrusted, the memory recall pipeline returns a no-op context — no recall injection. This ensures untrusted actors cannot surface or exploit previously extracted memory.
244
287
 
245
288
  Trust policy is **cross-channel and trust-class-based**: decisions use `trustContext.trustClass`, not the channel string. Desktop sessions default to `trustClass: 'guardian'`. External channels (Telegram, WhatsApp, phone) provide explicit trust context via the resolver. Messages without provenance metadata are treated as trusted (guardian); all new messages carry provenance.
246
289
 
290
+ ### Embedding Backend Selection
291
+
292
+ The embedding backend is selected based on `memory.embeddings.provider` config:
293
+
294
+ - `auto` (default): Tries local → OpenAI → Gemini → Ollama, using the first available.
295
+ - `local`: ONNX-based local model (bge-small-en-v1.5). Lazy-loaded to avoid crashing in compiled binaries where onnxruntime-node is unavailable.
296
+ - `openai`: OpenAI text-embedding-3-small. Requires `apiKeys.openai`.
297
+ - `gemini`: Gemini gemini-embedding-001. Requires `apiKeys.gemini`. Only backend supporting multimodal embeddings (images, audio, video).
298
+ - `ollama`: Ollama nomic-embed-text. Requires Ollama to be configured.
299
+
300
+ An in-memory LRU vector cache (32 MB cap, keyed by `sha256(provider + model + content)`) avoids redundant embedding calls for identical content. Sparse embeddings are generated in-process (no external calls).
301
+
302
+ ### Graceful Degradation
303
+
304
+ When the embedding backend or Qdrant is unavailable:
305
+
306
+ - A **circuit breaker** on Qdrant (`qdrant-circuit-breaker.ts`) tracks consecutive failures and short-circuits search calls when the breaker is open.
307
+ - If embedding generation fails and `memory.embeddings.required` is `true`, recall returns an empty result with a degradation status (`embedding_generation_failed` or `embedding_provider_down`).
308
+ - If embeddings are optional (default), the pipeline falls back to recency-only search.
309
+ - Degradation status is reported to clients via `memory_status` events.
310
+
247
311
  ---
248
312
 
249
313
  ## Private Threads — Isolated Memory and Strict Side-Effect Controls
@@ -289,8 +353,6 @@ graph TB
289
353
 
290
354
  **Read fallback**: When recalling memories for a private thread, the retriever queries both the thread's own scope and the `'default'` scope. This ensures the assistant still has access to general knowledge (user profile, preferences, facts) learned in standard threads, while private-thread-specific memories take precedence in ranking. The fallback is implemented via `ScopePolicyOverride` with `fallbackToDefault: true`, which overrides the global scope policy on a per-call basis.
291
355
 
292
- **Profile compilation**: The `ProfileCompiler` also respects this dual-scope behavior for private threads — it includes profile/preference/constraint items from both the private scope and the default scope when building the runtime profile block.
293
-
294
356
  ### SessionMemoryPolicy
295
357
 
296
358
  The daemon derives a `SessionMemoryPolicy` from the conversation's `thread_type` and `memory_scope_id` when creating or restoring a session:
@@ -333,8 +395,7 @@ This ensures that file writes, bash commands, host operations, and other mutatin
333
395
  | `assistant/src/tools/executor.ts` | `forcePromptSideEffects` gate — promotes allow to prompt for side-effect tools |
334
396
  | `assistant/src/memory/search/types.ts` | `ScopePolicyOverride` interface for per-call scope control |
335
397
  | `assistant/src/memory/retriever.ts` | `buildScopeFilter()` — builds scope ID list from override or global config |
336
- | `assistant/src/memory/profile-compiler.ts` | Dual-scope profile compilation with `includeDefaultFallback` |
337
- | `assistant/src/daemon/session-memory.ts` | Wires `scopeId` and `includeDefaultFallback` into recall and profile compilation |
398
+ | `assistant/src/daemon/session-memory.ts` | Wires `scopeId` and `includeDefaultFallback` into recall |
338
399
 
339
400
  ---
340
401
 
@@ -387,7 +448,7 @@ graph TB
387
448
 
388
449
  ### Cache compatibility
389
450
 
390
- The Anthropic provider places `cache_control: { type: 'ephemeral' }` on the **last content block** of the last two user turns. Since workspace context is prepended (first block), the cache breakpoint correctly lands on the trailing user text or dynamic profile block. This is validated by dedicated cache-compatibility tests.
451
+ The Anthropic provider places `cache_control: { type: 'ephemeral' }` on the **last content block** of the last two user turns. Since workspace context is prepended (first block), the cache breakpoint correctly lands on the trailing user text block. This is validated by dedicated cache-compatibility tests.
391
452
 
392
453
  ### Key files
393
454
 
@@ -425,7 +486,7 @@ graph TB
425
486
 
426
487
  - **Fresh each turn**: `buildTemporalContext()` is called at the start of every agent loop invocation, ensuring the model always sees the current date even in long-running conversations.
427
488
  - **Clock source invariant**: Absolute time (`now`) always comes from the assistant host clock (`Date.now()`), never from channel/client clocks.
428
- - **Timezone precedence**: If `ui.userTimezone` is configured, temporal context uses it for local-date interpretation. Otherwise it falls back to dynamic profile memory, then assistant host timezone.
489
+ - **Timezone precedence**: If `ui.userTimezone` is configured, temporal context uses it for local-date interpretation. Otherwise it falls back to memory-stored timezone, then assistant host timezone.
429
490
  - **Timezone-aware**: Uses `Intl.DateTimeFormat` APIs for DST-safe date arithmetic and timezone validation/canonicalization.
430
491
  - **Bounded output**: Hard-capped at 1500 characters and 14 horizon entries to prevent prompt bloat.
431
492
  - **Runtime-only**: The injected `<temporal_context>` block is stripped from `this.messages` after the agent loop completes via `stripTemporalContext`. It never persists in conversation history.