npm - @vellumai/assistant - Versions diffs - 0.4.49 → 0.4.50 - Mend

@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (239) hide show

package/ARCHITECTURE.md +24 -33
package/README.md +3 -3
package/docs/architecture/memory.md +180 -119
package/package.json +2 -2
package/src/__tests__/agent-loop.test.ts +3 -1
package/src/__tests__/anthropic-provider.test.ts +114 -23
package/src/__tests__/approval-cascade.test.ts +1 -15
package/src/__tests__/approval-routes-http.test.ts +2 -0
package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
package/src/__tests__/canonical-guardian-store.test.ts +95 -0
package/src/__tests__/checker.test.ts +13 -0
package/src/__tests__/config-schema.test.ts +1 -68
package/src/__tests__/context-memory-e2e.test.ts +11 -100
package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
package/src/__tests__/credential-security-e2e.test.ts +1 -0
package/src/__tests__/credential-vault-unit.test.ts +4 -0
package/src/__tests__/credential-vault.test.ts +13 -1
package/src/__tests__/cu-unified-flow.test.ts +532 -0
package/src/__tests__/date-context.test.ts +93 -77
package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
package/src/__tests__/history-repair.test.ts +245 -0
package/src/__tests__/host-cu-proxy.test.ts +165 -3
package/src/__tests__/http-user-message-parity.test.ts +1 -0
package/src/__tests__/invite-redemption-service.test.ts +65 -1
package/src/__tests__/keychain-broker-client.test.ts +4 -4
package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
package/src/__tests__/memory-recall-quality.test.ts +244 -407
package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
package/src/__tests__/memory-regressions.test.ts +477 -2841
package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
package/src/__tests__/mime-builder.test.ts +28 -0
package/src/__tests__/native-web-search.test.ts +1 -0
package/src/__tests__/oauth-cli.test.ts +572 -5
package/src/__tests__/oauth-store.test.ts +120 -6
package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
package/src/__tests__/registry.test.ts +0 -1
package/src/__tests__/relay-server.test.ts +46 -1
package/src/__tests__/schedule-tools.test.ts +32 -0
package/src/__tests__/script-proxy-certs.test.ts +1 -1
package/src/__tests__/secret-onetime-send.test.ts +1 -0
package/src/__tests__/secure-keys.test.ts +7 -2
package/src/__tests__/send-endpoint-busy.test.ts +3 -0
package/src/__tests__/session-abort-tool-results.test.ts +1 -14
package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
package/src/__tests__/session-agent-loop.test.ts +19 -15
package/src/__tests__/session-confirmation-signals.test.ts +1 -15
package/src/__tests__/session-error.test.ts +124 -2
package/src/__tests__/session-history-web-search.test.ts +918 -0
package/src/__tests__/session-pre-run-repair.test.ts +1 -14
package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
package/src/__tests__/session-queue.test.ts +37 -27
package/src/__tests__/session-runtime-assembly.test.ts +54 -0
package/src/__tests__/session-slash-known.test.ts +1 -15
package/src/__tests__/session-slash-queue.test.ts +1 -15
package/src/__tests__/session-slash-unknown.test.ts +1 -15
package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
package/src/__tests__/session-workspace-injection.test.ts +3 -37
package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
package/src/__tests__/skills-install-extract.test.ts +93 -0
package/src/__tests__/skillssh-registry.test.ts +451 -0
package/src/__tests__/trust-store.test.ts +15 -0
package/src/__tests__/voice-invite-redemption.test.ts +32 -1
package/src/agent/ax-tree-compaction.test.ts +51 -0
package/src/agent/loop.ts +39 -12
package/src/approvals/AGENTS.md +1 -1
package/src/approvals/guardian-request-resolvers.ts +14 -2
package/src/bundler/compiler-tools.ts +66 -2
package/src/calls/call-domain.ts +132 -0
package/src/calls/call-store.ts +6 -0
package/src/calls/relay-server.ts +43 -5
package/src/calls/relay-setup-router.ts +17 -1
package/src/calls/twilio-config.ts +1 -1
package/src/calls/types.ts +3 -1
package/src/cli/commands/doctor.ts +4 -3
package/src/cli/commands/mcp.ts +46 -59
package/src/cli/commands/memory.ts +16 -165
package/src/cli/commands/oauth/apps.ts +31 -2
package/src/cli/commands/oauth/connections.ts +431 -97
package/src/cli/commands/oauth/providers.ts +15 -1
package/src/cli/commands/sessions.ts +5 -2
package/src/cli/commands/skills.ts +173 -1
package/src/cli/http-client.ts +0 -20
package/src/cli/main-screen.tsx +2 -2
package/src/cli/program.ts +5 -6
package/src/cli.ts +4 -10
package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
package/src/config/bundled-tool-registry.ts +2 -5
package/src/config/schema.ts +1 -12
package/src/config/schemas/memory-lifecycle.ts +0 -9
package/src/config/schemas/memory-processing.ts +0 -180
package/src/config/schemas/memory-retrieval.ts +32 -104
package/src/config/schemas/memory.ts +0 -10
package/src/config/types.ts +0 -4
package/src/context/window-manager.ts +4 -1
package/src/daemon/config-watcher.ts +61 -3
package/src/daemon/daemon-control.ts +1 -1
package/src/daemon/date-context.ts +114 -31
package/src/daemon/handlers/sessions.ts +18 -13
package/src/daemon/handlers/skills.ts +20 -1
package/src/daemon/history-repair.ts +72 -8
package/src/daemon/host-cu-proxy.ts +55 -26
package/src/daemon/lifecycle.ts +31 -3
package/src/daemon/mcp-reload-service.ts +2 -2
package/src/daemon/message-types/computer-use.ts +1 -12
package/src/daemon/message-types/memory.ts +4 -16
package/src/daemon/message-types/messages.ts +1 -0
package/src/daemon/message-types/sessions.ts +4 -0
package/src/daemon/server.ts +12 -1
package/src/daemon/session-agent-loop-handlers.ts +38 -0
package/src/daemon/session-agent-loop.ts +334 -48
package/src/daemon/session-error.ts +89 -6
package/src/daemon/session-history.ts +17 -7
package/src/daemon/session-media-retry.ts +6 -2
package/src/daemon/session-memory.ts +69 -149
package/src/daemon/session-process.ts +10 -1
package/src/daemon/session-runtime-assembly.ts +49 -19
package/src/daemon/session-surfaces.ts +4 -1
package/src/daemon/session-tool-setup.ts +7 -1
package/src/daemon/session.ts +12 -2
package/src/instrument.ts +61 -1
package/src/memory/admin.ts +2 -191
package/src/memory/canonical-guardian-store.ts +38 -2
package/src/memory/conversation-crud.ts +0 -33
package/src/memory/conversation-queries.ts +22 -3
package/src/memory/db-init.ts +28 -0
package/src/memory/embedding-backend.ts +84 -8
package/src/memory/embedding-types.ts +9 -1
package/src/memory/indexer.ts +7 -46
package/src/memory/items-extractor.ts +274 -76
package/src/memory/job-handlers/backfill.ts +2 -127
package/src/memory/job-handlers/cleanup.ts +2 -16
package/src/memory/job-handlers/extraction.ts +2 -138
package/src/memory/job-handlers/index-maintenance.ts +1 -6
package/src/memory/job-handlers/summarization.ts +3 -148
package/src/memory/job-utils.ts +21 -59
package/src/memory/jobs-store.ts +1 -159
package/src/memory/jobs-worker.ts +9 -52
package/src/memory/migrations/104-core-indexes.ts +3 -3
package/src/memory/migrations/149-oauth-tables.ts +2 -0
package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
package/src/memory/migrations/154-drop-fts.ts +20 -0
package/src/memory/migrations/155-drop-conflicts.ts +7 -0
package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
package/src/memory/migrations/index.ts +7 -0
package/src/memory/qdrant-client.ts +148 -51
package/src/memory/raw-query.ts +1 -1
package/src/memory/retriever.test.ts +294 -273
package/src/memory/retriever.ts +421 -645
package/src/memory/schema/calls.ts +2 -0
package/src/memory/schema/memory-core.ts +3 -48
package/src/memory/schema/oauth.ts +2 -0
package/src/memory/search/formatting.ts +263 -176
package/src/memory/search/lexical.ts +1 -254
package/src/memory/search/ranking.ts +0 -455
package/src/memory/search/semantic.ts +100 -14
package/src/memory/search/staleness.ts +47 -0
package/src/memory/search/tier-classifier.ts +21 -0
package/src/memory/search/types.ts +15 -77
package/src/memory/task-memory-cleanup.ts +4 -6
package/src/messaging/providers/gmail/mime-builder.ts +17 -7
package/src/oauth/byo-connection.test.ts +8 -1
package/src/oauth/oauth-store.ts +113 -27
package/src/oauth/seed-providers.ts +6 -0
package/src/oauth/token-persistence.ts +11 -3
package/src/permissions/defaults.ts +1 -0
package/src/permissions/trust-store.ts +23 -1
package/src/playbooks/playbook-compiler.ts +1 -1
package/src/prompts/system-prompt.ts +18 -2
package/src/providers/anthropic/client.ts +56 -126
package/src/providers/types.ts +7 -1
package/src/runtime/AGENTS.md +9 -0
package/src/runtime/auth/route-policy.ts +6 -3
package/src/runtime/guardian-reply-router.ts +24 -22
package/src/runtime/http-server.ts +2 -2
package/src/runtime/invite-redemption-service.ts +19 -1
package/src/runtime/invite-service.ts +25 -0
package/src/runtime/pending-interactions.ts +2 -2
package/src/runtime/routes/brain-graph-routes.ts +10 -90
package/src/runtime/routes/conversation-routes.ts +9 -1
package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
package/src/runtime/routes/memory-item-routes.test.ts +754 -0
package/src/runtime/routes/memory-item-routes.ts +503 -0
package/src/runtime/routes/session-management-routes.ts +3 -3
package/src/runtime/routes/settings-routes.ts +2 -2
package/src/runtime/routes/trust-rules-routes.ts +14 -0
package/src/runtime/routes/workspace-routes.ts +2 -1
package/src/security/keychain-broker-client.ts +17 -4
package/src/security/secure-keys.ts +25 -3
package/src/security/token-manager.ts +36 -36
package/src/skills/catalog-install.ts +74 -18
package/src/skills/skillssh-registry.ts +503 -0
package/src/tools/assets/search.ts +5 -1
package/src/tools/computer-use/definitions.ts +0 -10
package/src/tools/computer-use/registry.ts +1 -1
package/src/tools/credentials/vault.ts +1 -3
package/src/tools/memory/definitions.ts +4 -13
package/src/tools/memory/handlers.test.ts +83 -103
package/src/tools/memory/handlers.ts +50 -85
package/src/tools/schedule/create.ts +8 -1
package/src/tools/schedule/update.ts +8 -1
package/src/tools/skills/load.ts +25 -2
package/src/__tests__/clarification-resolver.test.ts +0 -193
package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
package/src/__tests__/conflict-policy.test.ts +0 -269
package/src/__tests__/conflict-store.test.ts +0 -372
package/src/__tests__/contradiction-checker.test.ts +0 -361
package/src/__tests__/entity-extractor.test.ts +0 -211
package/src/__tests__/entity-search.test.ts +0 -1117
package/src/__tests__/profile-compiler.test.ts +0 -392
package/src/__tests__/session-conflict-gate.test.ts +0 -1228
package/src/__tests__/session-profile-injection.test.ts +0 -557
package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
package/src/daemon/session-conflict-gate.ts +0 -167
package/src/daemon/session-dynamic-profile.ts +0 -77
package/src/memory/clarification-resolver.ts +0 -417
package/src/memory/conflict-intent.ts +0 -205
package/src/memory/conflict-policy.ts +0 -127
package/src/memory/conflict-store.ts +0 -410
package/src/memory/contradiction-checker.ts +0 -508
package/src/memory/entity-extractor.ts +0 -535
package/src/memory/format-recall.ts +0 -47
package/src/memory/fts-reconciler.ts +0 -165
package/src/memory/job-handlers/conflict.ts +0 -200
package/src/memory/profile-compiler.ts +0 -195
package/src/memory/recall-cache.ts +0 -117
package/src/memory/search/entity.ts +0 -535
package/src/memory/search/query-expansion.test.ts +0 -70
package/src/memory/search/query-expansion.ts +0 -118
package/src/runtime/routes/mcp-routes.ts +0 -20

package/ARCHITECTURE.md CHANGED Viewed

@@ -692,15 +692,10 @@ graph LR
         MSG["messages<br/>───────────────<br/>id, conversation_id (FK)<br/>role: user | assistant<br/>content: JSON array<br/>created_at"]
         TOOL["tool_invocations<br/>───────────────<br/>tool_name, input, result<br/>decision, risk_level<br/>duration_ms"]
         SEG["memory_segments<br/>───────────────<br/>Text chunks for retrieval<br/>Linked to messages<br/>token_estimate per segment"]
-        FTS["memory_segment_fts<br/>───────────────<br/>FTS5 virtual table<br/>Auto-synced via triggers<br/>Powers lexical search"]
         ITEMS["memory_items<br/>───────────────<br/>Extracted facts/entities<br/>kind, subject, statement<br/>confidence, fingerprint (dedup)<br/>verification_state, scope_id<br/>first/last seen timestamps"]
-        CONFLICTS["memory_item_conflicts<br/>───────────────<br/>Pending/resolved contradiction pairs<br/>existing_item_id + candidate_item_id<br/>clarification question + resolution note<br/>partial unique pending pair index"]
-        ENTITIES["memory_entities<br/>───────────────<br/>Canonical entities + aliases<br/>mention_count, first/last seen<br/>Resolved across messages"]
-        RELS["memory_entity_relations<br/>───────────────<br/>Directional entity edges<br/>Unique by source/target/relation<br/>first/last seen + evidence"]
-        ITEM_ENTS["memory_item_entities<br/>───────────────<br/>Join table linking extracted<br/>memory_items to entities"]
         SUM["memory_summaries<br/>───────────────<br/>scope: conversation | weekly<br/>Compressed history for context<br/>window management"]
         EMB["memory_embeddings<br/>───────────────<br/>target: segment | item | summary<br/>provider + model metadata<br/>vector_json (float array)<br/>Powers semantic search"]
-        JOBS["memory_jobs<br/>───────────────<br/>Async task queue<br/>Types: embed, extract,<br/>summarize, backfill,<br/>conflict resolution, cleanup<br/>Status: pending → running →<br/>completed | failed"]
+        JOBS["memory_jobs<br/>───────────────<br/>Async task queue<br/>Types: embed, extract,<br/>summarize, backfill, cleanup<br/>Status: pending → running →<br/>completed | failed"]
         ATT["attachments<br/>───────────────<br/>base64-encoded file data<br/>mime_type, size_bytes<br/>Linked to messages via<br/>message_attachments join"]
         REM["reminders<br/>───────────────<br/>One-time scheduled reminders<br/>label, message, fireAt<br/>mode: notify | execute<br/>status: pending → fired | cancelled<br/>routing_intent: single_channel |<br/>multi_channel | all_channels<br/>routing_hints_json (free-form)"]
         SCHED_JOBS["cron_jobs (recurrence schedules)<br/>───────────────<br/>Recurring schedule definitions<br/>cron_expression: cron or RRULE string<br/>schedule_syntax: 'cron' | 'rrule'<br/>timezone, message, next_run_at<br/>enabled, retry_count<br/>Legacy alias: scheduleJobs"]
@@ -940,8 +935,7 @@ graph TB
     end
     subgraph "Text Q&A Session"
-        TEXT_TOOLS["Tools: sandbox file_* / bash,<br/>host_file_* / host_bash,<br/>ui_show, ...<br/>+ dynamically projected skill tools<br/>(browser_* via bundled browser skill)"]
-        ESCALATE["computer_use_request_control<br/>(proxy tool)"]
+        TEXT_TOOLS["Tools: sandbox file_* / bash,<br/>host_file_* / host_bash,<br/>ui_show, ...<br/>+ dynamically projected skill tools<br/>(browser_* via bundled browser skill,<br/>computer_use_* via bundled computer-use skill)"]
     end
     SUBMIT --> SLASH_CHECK
@@ -953,22 +947,21 @@ graph TB
     CLASSIFIER -->|"text_qa"| QA_ROUTE
     QA_ROUTE --> TEXT_TOOLS
-    TEXT_TOOLS -.->|"User explicitly requests<br/>computer control"| ESCALATE
-    ESCALATE -.->|"Creates CU session<br/>via surfaceProxyResolver"| CU_ROUTE
+    TEXT_TOOLS -.->|"computer_use_* actions<br/>forwarded via HostCuProxy"| CU_ROUTE
 ```
 ### Action Execution Hierarchy
 The text_qa system prompt includes an action execution hierarchy that guides tool selection toward the least invasive method:
-| Priority        | Method                         | Tool                                  | When to use                                                 |
-| --------------- | ------------------------------ | ------------------------------------- | ----------------------------------------------------------- |
-| **BEST**        | Sandboxed filesystem/shell     | `file_*`, `bash`                      | Work that can stay isolated in sandbox filesystem           |
-| **BETTER**      | Explicit host filesystem/shell | `host_file_*`, `host_bash`            | Host reads/writes/commands that must touch the real machine |
-| **GOOD**        | Headless browser               | `browser_*` (bundled `browser` skill) | Web automation, form filling, scraping (background)         |
-| **LAST RESORT** | Foreground computer use        | `computer_use_request_control`        | Only on explicit user request ("go ahead", "take over")     |
+| Priority        | Method                         | Tool                                            | When to use                                                 |
+| --------------- | ------------------------------ | ----------------------------------------------- | ----------------------------------------------------------- |
+| **BEST**        | Sandboxed filesystem/shell     | `file_*`, `bash`                                | Work that can stay isolated in sandbox filesystem           |
+| **BETTER**      | Explicit host filesystem/shell | `host_file_*`, `host_bash`                      | Host reads/writes/commands that must touch the real machine |
+| **GOOD**        | Headless browser               | `browser_*` (bundled `browser` skill)           | Web automation, form filling, scraping (background)         |
+| **LAST RESORT** | Foreground computer use        | `computer_use_*` (bundled `computer-use` skill) | Only on explicit user request ("go ahead", "take over")     |
-The `computer_use_request_control` tool is a core proxy tool available only to text*qa sessions. When invoked, the session's `surfaceProxyResolver` creates a CU session and sends a `task_routed` message to the client, effectively escalating from text_qa to foreground computer use. The CU session constructor sets `preactivatedSkillIds: ['computer-use']`, and its `getProjectedCuToolDefinitions()` calls `projectSkillTools()` to load the 12 `computer_use*\*`action tools from the bundled`computer-use` skill (via TOOLS.json). These tools are not core-registered at daemon startup; they exist only within CU sessions through skill projection.
+Computer-use tools are proxy tools provided by the bundled `computer-use` skill, preactivated via `preactivatedSkillIds` in desktop sessions. Each tool forwards actions to the connected macOS client via `HostCuProxy`, which handles request/resolve proxying, step counting, loop detection, and observation formatting within the unified agent loop. These tools are not core-registered at daemon startup; they exist only through skill projection.
 ### Sandbox Filesystem and Host Access
@@ -988,7 +981,7 @@ graph TB
     SBPL --> SB_FS["Sandbox filesystem root<br/>~/.vellum/workspace"]
     BWRAP --> SB_FS
-    EXEC -->|"host_file_* / host_bash / computer_use_request_control"| HOST_TOOLS["Host-target tools<br/>(unchanged by backend choice)"]
+    EXEC -->|"host_file_* / host_bash"| HOST_TOOLS["Host-target tools<br/>(unchanged by backend choice)"]
     EXEC -->|"computer_use_* (skill-projected<br/>in CU sessions only)"| SKILL_CU_TOOLS["CU skill tools<br/>(bundled computer-use skill)"]
     HOST_TOOLS --> CHECK["Permission checker + trust-store"]
     SKILL_CU_TOOLS --> CHECK
@@ -1005,7 +998,7 @@ graph TB
 - **Host tools unchanged**: `host_bash`, `host_file_read`, `host_file_write`, and `host_file_edit` always execute directly on the host regardless of which sandbox backend is active.
 - Sandbox defaults: `file_*` and `bash` execute within `~/.vellum/workspace`.
 - Host access is explicit: `host_file_read`, `host_file_write`, `host_file_edit`, and `host_bash` are separate tools.
-- Prompt defaults: host tools, `computer_use_request_control`, and `computer_use_*` skill-projected actions default to `ask` unless a trust rule allowlists/denylists them.
+- Prompt defaults: host tools and `computer_use_*` skill-projected actions default to `ask` unless a trust rule allowlists/denylists them.
 - Browser tool defaults: all `browser_*` tools are auto-allowed by default via seeded allow rules at priority 100, preserving the frictionless UX from when browser was a core tool.
 - Confirmation payloads include `executionTarget` (`sandbox` or `host`) so clients can label where the action will run.
@@ -1187,16 +1180,16 @@ skills/<skill-id>/
 The following capabilities ship as bundled skills in `assistant/src/config/bundled-skills/`:
-| Skill ID        | Tools                                                                                                                                                                                                                                                                                              | Purpose                                                                                                                                                                                                                                                                                                         |
-| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `browser`       | `browser_navigate`, `browser_snapshot`, `browser_screenshot`, `browser_close`, `browser_click`, `browser_type`, `browser_press_key`, `browser_wait_for`, `browser_extract`, `browser_fill_credential`                                                                                              | Headless browser automation — web scraping, form filling, interaction (previously core-registered as `headless-browser`; now skill-provided with default allow rules)                                                                                                                                           |
-| `gmail`         | Gmail search, archive, send, etc.                                                                                                                                                                                                                                                                  | Email management via OAuth2 integration                                                                                                                                                                                                                                                                         |
-| `claude-code`   | Claude Code tool                                                                                                                                                                                                                                                                                   | Delegate coding tasks to Claude Code subprocess                                                                                                                                                                                                                                                                 |
-| `computer-use`  | `computer_use_click`, `computer_use_double_click`, `computer_use_right_click`, `computer_use_type_text`, `computer_use_key`, `computer_use_scroll`, `computer_use_drag`, `computer_use_open_app`, `computer_use_run_applescript`, `computer_use_wait`, `computer_use_done`, `computer_use_respond` | Computer-use action tools — internally preactivated by `ComputerUseSession` via `preactivatedSkillIds`; not user-invocable or model-discoverable in text sessions. Each wrapper script forwards to `forwardComputerUseProxyTool()` which uses the session's proxy resolver to send actions to the macOS client. |
-| `weather`       | `get-weather`                                                                                                                                                                                                                                                                                      | Fetch current weather data                                                                                                                                                                                                                                                                                      |
-| `app-builder`   | `app_create`, `app_list`, `app_query`, `app_update`, `app_delete`, `app_file_list`, `app_file_read`, `app_file_edit`, `app_file_write`                                                                                                                                                             | Dynamic app authoring — CRUD and file-level editing for persistent apps (activated via `skill_load app-builder`; `app_open` remains a core proxy tool)                                                                                                                                                          |
-| `self-upgrade`  | (instruction-only)                                                                                                                                                                                                                                                                                 | Self-improvement workflow                                                                                                                                                                                                                                                                                       |
-| `start-the-day` | (instruction-only)                                                                                                                                                                                                                                                                                 | Morning briefing routine                                                                                                                                                                                                                                                                                        |
+| Skill ID        | Tools                                                                                                                                                                                                                                                             | Purpose                                                                                                                                                                                                                                                                                              |
+| --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `browser`       | `browser_navigate`, `browser_snapshot`, `browser_screenshot`, `browser_close`, `browser_click`, `browser_type`, `browser_press_key`, `browser_wait_for`, `browser_extract`, `browser_fill_credential`                                                             | Headless browser automation — web scraping, form filling, interaction (previously core-registered as `headless-browser`; now skill-provided with default allow rules)                                                                                                                                |
+| `gmail`         | Gmail search, archive, send, etc.                                                                                                                                                                                                                                 | Email management via OAuth2 integration                                                                                                                                                                                                                                                              |
+| `claude-code`   | Claude Code tool                                                                                                                                                                                                                                                  | Delegate coding tasks to Claude Code subprocess                                                                                                                                                                                                                                                      |
+| `computer-use`  | `computer_use_observe`, `computer_use_click`, `computer_use_type_text`, `computer_use_key`, `computer_use_scroll`, `computer_use_drag`, `computer_use_wait`, `computer_use_open_app`, `computer_use_run_applescript`, `computer_use_done`, `computer_use_respond` | Computer-use proxy tools — preactivated via `preactivatedSkillIds` in desktop sessions. Each tool forwards actions to the connected macOS client via `HostCuProxy`, which handles request/resolve proxying, step counting, loop detection, and observation formatting within the unified agent loop. |
+| `weather`       | `get-weather`                                                                                                                                                                                                                                                     | Fetch current weather data                                                                                                                                                                                                                                                                           |
+| `app-builder`   | `app_create`, `app_list`, `app_query`, `app_update`, `app_delete`, `app_file_list`, `app_file_read`, `app_file_edit`, `app_file_write`                                                                                                                            | Dynamic app authoring — CRUD and file-level editing for persistent apps (activated via `skill_load app-builder`; `app_open` remains a core proxy tool)                                                                                                                                               |
+| `self-upgrade`  | (instruction-only)                                                                                                                                                                                                                                                | Self-improvement workflow                                                                                                                                                                                                                                                                            |
+| `start-the-day` | (instruction-only)                                                                                                                                                                                                                                                | Morning briefing routine                                                                                                                                                                                                                                                                             |
 ### Activation and Projection Flow
@@ -1240,7 +1233,7 @@ graph TB
     RESOLVE --> PROVIDER
 ```
-**Internal preactivation**: Some bundled skills are preactivated programmatically rather than by user slash commands or model discovery. For example, `ComputerUseSession` sets `preactivatedSkillIds: ['computer-use']` in its constructor, causing `projectSkillTools()` to load the 12 `computer_use_*` tool definitions from the bundled skill's `TOOLS.json` on the first turn. These tools are never exposed in text sessions — they only appear in the CU session's agent loop.
+**Internal preactivation**: Some bundled skills are preactivated programmatically rather than by user slash commands or model discovery. For example, desktop sessions set `preactivatedSkillIds: ['computer-use']`, causing `projectSkillTools()` to load the 11 `computer_use_*` tool definitions from the bundled skill's `TOOLS.json` on the first turn. These proxy tools forward actions to the connected macOS client via `HostCuProxy`.
 ### Skill Tool Execution
@@ -1917,10 +1910,8 @@ Connected channels are resolved at signal emission time: vellum is always includ
 | User preferences                             | UserDefaults                                                      | plist                               | Foundation                         | Permanent                                                  |
 | Session logs                                 | `~/Library/.../logs/session-*.json`                               | JSON per session                    | Swift Codable                      | Unbounded                                                  |
 | Conversations & messages                     | `~/.vellum/workspace/data/db/assistant.db`                        | SQLite + WAL                        | Drizzle ORM (Bun)                  | Permanent                                                  |
-| Memory segments & FTS                        | `~/.vellum/workspace/data/db/assistant.db`                        | SQLite FTS5                         | Drizzle ORM                        | Permanent                                                  |
+| Memory segments                              | `~/.vellum/workspace/data/db/assistant.db`                        | SQLite                              | Drizzle ORM                        | Permanent                                                  |
 | Extracted facts                              | `~/.vellum/workspace/data/db/assistant.db`                        | SQLite                              | Drizzle ORM                        | Permanent, deduped                                         |
-| Conflict lifecycle rows                      | `~/.vellum/workspace/data/db/assistant.db`                        | SQLite                              | Drizzle ORM                        | Pending until clarified, then retained as resolved history |
-| Entity graph (entities/relations/item links) | `~/.vellum/workspace/data/db/assistant.db`                        | SQLite                              | Drizzle ORM                        | Permanent, deduped by unique relation edge                 |
 | Embeddings                                   | `~/.vellum/workspace/data/db/assistant.db`                        | JSON float arrays                   | Drizzle ORM                        | Permanent                                                  |
 | Async job queue                              | `~/.vellum/workspace/data/db/assistant.db`                        | SQLite                              | Drizzle ORM                        | Completed jobs persist                                     |
 | Attachments                                  | `~/.vellum/workspace/data/db/assistant.db`                        | Base64 in SQLite                    | Drizzle ORM                        | Permanent                                                  |

package/README.md CHANGED Viewed

@@ -16,7 +16,7 @@ CLI / macOS app / iOS app
         │       ├── Google Gemini (secondary)
         │       └── Ollama (local models)
         │
-        ├── Memory System (FTS5 + Qdrant + Entity Graph)
+        ├── Memory System (Qdrant Hybrid Search)
         ├── Skill Tool System (bundled + managed + workspace)
         ├── Swarm Orchestration (DAG scheduler + worker pool)
         ├── Script Proxy (credential injection + MITM)
@@ -99,7 +99,7 @@ assistant/
 │   ├── daemon/               # Daemon server, session management
 │   ├── agent/                # Agent loop and LLM interaction
 │   ├── providers/            # LLM provider integrations (Anthropic, OpenAI, Gemini, Ollama)
-│   ├── memory/               # Conversation store, memory indexer, recall (FTS5 + Qdrant)
+│   ├── memory/               # Conversation store, memory indexer, recall (Qdrant hybrid search)
 │   ├── skills/               # Skill catalog, loading, and tool factory
 │   ├── tools/                # Built-in tool definitions
 │   ├── swarm/                # Swarm orchestration (DAG scheduler, worker pool)
@@ -446,7 +446,7 @@ If no guardian binding exists, escalation fails closed — the message is denied
 ## Database
-SQLite via Drizzle ORM, stored at `~/.vellum/workspace/data/db/assistant.db`. Key tables include conversations, messages, tool invocations, attachments, memory segments (with FTS5), memory items, entities, reminders, and recurrence schedules (cron + RRULE).
+SQLite via Drizzle ORM, stored at `~/.vellum/workspace/data/db/assistant.db`. Key tables include conversations, messages, tool invocations, attachments, memory segments, memory items, reminders, and recurrence schedules (cron + RRULE).
 > **Note:** The recurrence schedule system supports both cron expressions and iCalendar RRULE syntax. Use the `expression` field with an explicit `syntax` discriminator. See [`docs/architecture/scheduling.md`](docs/architecture/scheduling.md) for details.

package/docs/architecture/memory.md CHANGED Viewed

@@ -12,52 +12,51 @@ graph TB
         INDEX["Memory Indexer"]
         SEGMENT["Split into segments<br/>→ memory_segments"]
         EXTRACT_JOB["Enqueue extract_items job<br/>→ memory_jobs"]
-        CONFLICT_RESOLVE_JOB["Enqueue resolve_pending_conflicts_for_message<br/>(dedupe by type+message+scope)<br/>→ memory_jobs"]
         SUMMARY_JOB["Enqueue build_conversation_summary<br/>→ memory_jobs"]
     end
     subgraph "Background Worker (polls every 1.5s)"
         WORKER["MemoryJobsWorker"]
-        EMBED_SEG["embed_segment<br/>→ memory_embeddings"]
-        EMBED_ITEM["embed_item<br/>→ memory_embeddings"]
-        EMBED_SUM["embed_summary<br/>→ memory_embeddings"]
-        EXTRACT["extract_items<br/>→ memory_items +<br/>memory_item_sources"]
-        CHECK_CONTRA["check_contradictions<br/>→ contradiction/update merge OR<br/>pending_clarification + memory_item_conflicts"]
-        RESOLVE_PENDING["resolve_pending_conflicts_for_message<br/>message-scoped clarification resolution<br/>→ resolved conflict + item status updates"]
-        CLEAN_CONFLICTS["cleanup_resolved_conflicts<br/>delete resolved conflict rows<br/>older than retention window"]
-        CLEAN_SUPERSEDED["cleanup_stale_superseded_items<br/>delete stale superseded items<br/>and item embedding rows"]
-        EXTRACT_ENTITIES["extract_entities<br/>→ memory_entities +<br/>memory_item_entities +<br/>memory_entity_relations"]
-        BACKFILL_REL["backfill_entity_relations<br/>checkpointed message scan<br/>→ enqueue extract_entities"]
+        EMBED_SEG["embed_segment<br/>→ Qdrant (dense + sparse)"]
+        EMBED_ITEM["embed_item<br/>→ Qdrant (dense + sparse)"]
+        EMBED_SUM["embed_summary<br/>→ Qdrant (dense + sparse)"]
+        EXTRACT["extract_items<br/>→ memory_items +<br/>memory_item_sources<br/>(LLM-directed supersession)"]
+        CLEAN_SUPERSEDED["cleanup_stale_superseded_items<br/>delete stale superseded items<br/>and Qdrant vectors"]
         BUILD_SUM["build_conversation_summary<br/>→ memory_summaries"]
-        WEEKLY["refresh_weekly_summary<br/>→ memory_summaries"]
     end
-    subgraph "Embedding Providers"
+    subgraph "Embedding Provider Selection (selectEmbeddingBackend)"
+        PROVIDER_SELECT["Provider Selection<br/>auto: local → OpenAI → Gemini → Ollama<br/>or explicit config override"]
         LOCAL_EMB["Local (ONNX)<br/>bge-small-en-v1.5"]
         OAI_EMB["OpenAI<br/>text-embedding-3-small"]
         GEM_EMB["Gemini<br/>gemini-embedding-001"]
         OLL_EMB["Ollama<br/>nomic-embed-text"]
     end
+    subgraph "Sparse Embedding (in-process)"
+        SPARSE_GEN["generateSparseEmbedding()<br/>TF-IDF, FNV-1a hashing<br/>(no external calls)"]
+    end
+    subgraph "Qdrant Vector Store"
+        DENSE["Named vector: dense<br/>(cosine similarity)"]
+        SPARSE["Named vector: sparse<br/>(TF-IDF based)"]
+        RRF["Query API:<br/>Reciprocal Rank Fusion"]
+    end
     subgraph "Read Path (Memory Recall)"
+        NEEDS_MEM["needsMemory gate<br/>(skip short/empty/tool-result turns)"]
         QUERY["Recall Query Builder<br/>User request + compacted context summary"]
-        CONFLICT_GATE["Soft Conflict Gate<br/>dismiss non-actionable conflicts (kind + statement + provenance policy)<br/>attempt internal resolution from user turn<br/>relevance-based; never produces user-facing prompts"]
-        PROFILE_BUILD["Dynamic Profile Compiler<br/>active trusted profile memories<br/>user_confirmed > user_reported > assistant_inferred"]
-        PROFILE_INJECT["Inject profile context block<br/>into runtime user tail<br/>(strict token cap)"]
         BUDGET["Dynamic Recall Budget<br/>computeRecallBudget()<br/>from prompt headroom"]
-        LEX["Lexical Search<br/>FTS5 on memory_segment_fts"]
-        SEM["Semantic Search<br/>Qdrant cosine similarity"]
-        ENTITY_SEARCH["Entity Search<br/>Seed name/alias matching"]
-        REL_EXPAND["Relation Expansion<br/>1-hop via memory_entity_relations<br/>→ neighbor item links"]
-        DIRECT["Direct Item Search<br/>LIKE on subject/statement"]
+        EMBED_Q["Generate dense + sparse<br/>query embeddings"]
+        HYBRID["Hybrid Search<br/>dense + sparse RRF on Qdrant"]
+        RECENCY["Recency Search<br/>conversation-scoped, DB only"]
+        MERGE["Merge + Deduplicate<br/>weighted score combination"]
         SCOPE["Scope Filter<br/>scope_id filtering<br/>(strict | global_fallback)<br/>Private threads: own scope + 'default'"]
-        MERGE["RRF Merge<br/>+ Trust Weighting<br/>+ Freshness Decay"]
-        CAPS["Source Caps<br/>bound per-source candidate count"]
-        RERANK["LLM Re-ranking<br/>(Haiku, optional)"]
-        TRIM["Token Trim<br/>maxInjectTokens override<br/>or static fallback"]
-        INJECT["Attention-ordered<br/>Injection into prompt"]
-        TELEMETRY["Emit memory_recalled<br/>hits + relation counters +<br/>ranking diagnostics"]
-        STRIP_PROFILE["Strip injected dynamic profile block<br/>before persisting conversation history"]
+        TIER["Tier Classification<br/>score > 0.8 → tier 1<br/>score > 0.6 → tier 2<br/>below → dropped"]
+        STALE["Staleness Computation<br/>kind-specific lifetimes<br/>+ reinforcement from<br/>source conversation count"]
+        DEMOTE["Stale Demotion<br/>very_stale tier 1 → tier 2"]
+        INJECT["Two-Layer XML Injection<br/>budget-aware rendering"]
+        TELEMETRY["Emit memory_recalled<br/>tier counts + hybrid search ms +<br/>staleness stats"]
     end
     subgraph "Context Window Management"
@@ -83,49 +82,47 @@ graph TB
     STORE --> INDEX
     INDEX --> SEGMENT
     INDEX --> EXTRACT_JOB
-    INDEX --> CONFLICT_RESOLVE_JOB
     INDEX --> SUMMARY_JOB
     WORKER --> EMBED_SEG
     WORKER --> EMBED_ITEM
     WORKER --> EMBED_SUM
     WORKER --> EXTRACT
-    WORKER --> CHECK_CONTRA
-    WORKER --> RESOLVE_PENDING
-    WORKER --> CLEAN_CONFLICTS
     WORKER --> CLEAN_SUPERSEDED
-    WORKER --> EXTRACT_ENTITIES
-    WORKER --> BACKFILL_REL
     WORKER --> BUILD_SUM
-    WORKER --> WEEKLY
-    EXTRACT --> CHECK_CONTRA
-    EXTRACT --> EXTRACT_ENTITIES
-    EMBED_SEG --> OAI_EMB
-    EMBED_SEG --> GEM_EMB
-    EMBED_SEG --> OLL_EMB
-    QUERY --> CONFLICT_GATE
-    CONFLICT_GATE --> PROFILE_BUILD
-    PROFILE_BUILD --> PROFILE_INJECT
-    CONFLICT_GATE --> LEX
-    CONFLICT_GATE --> SEM
-    CONFLICT_GATE --> ENTITY_SEARCH
-    CONFLICT_GATE --> DIRECT
-    LEX --> SCOPE
-    SEM --> SCOPE
-    ENTITY_SEARCH --> REL_EXPAND
-    REL_EXPAND --> SCOPE
-    DIRECT --> SCOPE
+    EMBED_SEG --> PROVIDER_SELECT
+    EMBED_ITEM --> PROVIDER_SELECT
+    EMBED_SUM --> PROVIDER_SELECT
+    PROVIDER_SELECT --> LOCAL_EMB
+    PROVIDER_SELECT --> OAI_EMB
+    PROVIDER_SELECT --> GEM_EMB
+    PROVIDER_SELECT --> OLL_EMB
+    LOCAL_EMB --> DENSE
+    OAI_EMB --> DENSE
+    GEM_EMB --> DENSE
+    OLL_EMB --> DENSE
+    EMBED_SEG --> SPARSE_GEN
+    EMBED_ITEM --> SPARSE_GEN
+    EMBED_SUM --> SPARSE_GEN
+    SPARSE_GEN --> SPARSE
+    NEEDS_MEM --> QUERY
+    QUERY --> EMBED_Q
+    EMBED_Q --> PROVIDER_SELECT
+    EMBED_Q --> SPARSE_GEN
+    EMBED_Q --> HYBRID
+    HYBRID --> RRF
+    QUERY --> RECENCY
+    HYBRID --> SCOPE
+    RECENCY --> SCOPE
     SCOPE --> MERGE
-    MERGE --> CAPS
-    CAPS --> RERANK
-    RERANK --> TRIM
-    BUDGET --> TRIM
-    TRIM --> INJECT
-    PROFILE_INJECT --> INJECT
+    MERGE --> TIER
+    TIER --> STALE
+    STALE --> DEMOTE
+    BUDGET --> INJECT
+    DEMOTE --> INJECT
     INJECT --> TELEMETRY
-    INJECT --> STRIP_PROFILE
     CTX --> COMPACT
     COMPACT --> GUARDS
@@ -158,92 +155,159 @@ The key distinction: normal compaction is a cost-optimized background process th
 ### Memory Retrieval Config Knobs (Defaults)
-| Config key                                                |                                                           Default | Purpose                                                                                                            |
-| --------------------------------------------------------- | ----------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------ |
-| `memory.retrieval.dynamicBudget.enabled`                  |                                                            `true` | Toggle per-turn recall budget calculation from live prompt headroom.                                               |
-| `memory.retrieval.dynamicBudget.minInjectTokens`          |                                                            `1200` | Lower clamp for computed recall injection budget.                                                                  |
-| `memory.retrieval.dynamicBudget.maxInjectTokens`          |                                                           `10000` | Upper clamp for computed recall injection budget.                                                                  |
-| `memory.retrieval.dynamicBudget.targetHeadroomTokens`     |                                                           `10000` | Reserved headroom to keep free for response generation/tool traces.                                                |
-| `memory.entity.extractRelations.enabled`                  |                                                            `true` | Enable relation edge extraction and persistence in `memory_entity_relations`.                                      |
-| `memory.entity.extractRelations.backfillBatchSize`        |                                                             `200` | Batch size for checkpointed `backfill_entity_relations` jobs.                                                      |
-| `memory.entity.relationRetrieval.enabled`                 |                                                            `true` | Enable one-hop relation expansion from matched seed entities at recall time.                                       |
-| `memory.entity.relationRetrieval.maxSeedEntities`         |                                                               `8` | Maximum matched seed entities from the query.                                                                      |
-| `memory.entity.relationRetrieval.maxNeighborEntities`     |                                                              `20` | Maximum unique neighbor entities expanded from relation edges.                                                     |
-| `memory.entity.relationRetrieval.maxEdges`                |                                                              `40` | Maximum relation edges traversed during expansion.                                                                 |
-| `memory.entity.relationRetrieval.neighborScoreMultiplier` |                                                             `0.7` | Downweight multiplier for relation-expanded candidates vs direct entity hits.                                      |
-| `memory.conflicts.enabled`                                |                                                            `true` | Enable soft conflict gate for unresolved `memory_item_conflicts`.                                                  |
-| `memory.conflicts.resolverLlmTimeoutMs`                   |                                                           `12000` | Timeout bound for clarification resolver LLM fallback.                                                             |
-| `memory.conflicts.relevanceThreshold`                     |                                                             `0.3` | Similarity threshold for deciding whether a pending conflict is relevant to the current request.                   |
-| `memory.conflicts.gateMode`                               |                                                          `'soft'` | Conflict gate strategy. Currently only `'soft'` is supported (resolves conflicts internally without user prompts). |
-| `memory.conflicts.conflictableKinds`                      | `['preference', 'profile', 'constraint', 'instruction', 'style']` | Memory item kinds eligible for conflict detection. Items with kinds outside this list are auto-dismissed.          |
-| `memory.profile.enabled`                                  |                                                            `true` | Enable dynamic profile compilation from active trusted profile/preference/constraint/instruction memories.         |
-| `memory.profile.maxInjectTokens`                          |                                                             `800` | Hard token cap enforced by `ProfileCompiler` when generating the runtime profile block.                            |
+| Config key                                            |                   Default | Purpose                                                              |
+| ----------------------------------------------------- | ------------------------: | -------------------------------------------------------------------- |
+| `memory.retrieval.dynamicBudget.enabled`              |                    `true` | Toggle per-turn recall budget calculation from live prompt headroom. |
+| `memory.retrieval.dynamicBudget.minInjectTokens`      |                    `1200` | Lower clamp for computed recall injection budget.                    |
+| `memory.retrieval.dynamicBudget.maxInjectTokens`      |                   `10000` | Upper clamp for computed recall injection budget.                    |
+| `memory.retrieval.dynamicBudget.targetHeadroomTokens` |                   `10000` | Reserved headroom to keep free for response generation/tool traces.  |
+| `memory.retrieval.maxInjectTokens`                    |                   `10000` | Static fallback when dynamic budget is disabled.                     |
+| `memory.retrieval.scopePolicy`                        | `'allow_global_fallback'` | Scope filtering strategy: `'strict'` or `'allow_global_fallback'`.   |
 ### Memory Recall Debugging Playbook
 1. Run a recall-heavy turn and inspect `memory_recalled` events in the client trace stream.
 2. Validate baseline counters:
-   - `lexicalHits`, `semanticHits`, `recencyHits`, `entityHits`
-   - `relationSeedEntityCount`, `relationTraversedEdgeCount`, `relationNeighborEntityCount`, `relationExpandedItemCount`
+   - `semanticHits`, `recencyHits`
+   - `tier1Count`, `tier2Count`
+   - `hybridSearchLatencyMs`
    - `mergedCount`, `selectedCount`, `injectedTokens`, `latencyMs`
 3. Cross-check context pressure with `context_compacted` events:
    - `previousEstimatedInputTokens` vs `estimatedInputTokens`
    - `summaryCalls`, `compactedMessages`
 4. If dynamic budget is enabled, verify `injectedTokens` stays within the configured min/max clamps for `dynamicBudget`.
-5. Run `bun run src/index.ts memory status` and confirm cleanup pressure signals:
-   - `Pending conflicts`, `Resolved conflicts`, `Oldest pending conflict age`
-   - job queue counts for `cleanup_resolved_conflicts` / `cleanup_stale_superseded_items`
-6. Before tuning ranking or relation settings, run:
+5. Inspect staleness distribution in debug logs:
+   - `fresh`, `aging`, `stale`, `very_stale` counts
+   - Check for unexpected tier demotions (very_stale tier 1 items demoted to tier 2)
+6. Before tuning ranking settings, run:
    - `cd assistant && bun test src/__tests__/context-memory-e2e.test.ts`
    - `cd assistant && bun test src/__tests__/memory-context-benchmark.benchmark.test.ts`
    - `cd assistant && bun test src/__tests__/memory-recall-quality.test.ts`
-   - `cd assistant && bun test src/__tests__/memory-regressions.test.ts -t "relation"`
 7. After tuning, rerun the same suite and compare:
-   - relation counters (coverage)
+   - tier counts (coverage)
    - selected count / injected tokens (budget safety)
    - latency and ordering regressions via top candidate snapshots
-### Conflict Lifecycle and Profile Hygiene
+### Write Path — Extraction and Supersession
 ```mermaid
 stateDiagram-v2
-    [*] --> ActiveItems : extract_items/check_contradictions
-    ActiveItems --> PendingConflict : ambiguous_contradiction\n(candidate -> pending_clarification)
-    PendingConflict --> PendingConflict : internal evaluation\n(relevance check, no user prompt)
-    PendingConflict --> Dismissed : non-actionable\n(kind policy + transient statement filter)
-    PendingConflict --> ResolvedKeepExisting : clarification resolver\n+ applyConflictResolution
-    PendingConflict --> ResolvedKeepCandidate : clarification resolver\n+ applyConflictResolution
-    PendingConflict --> ResolvedMerge : clarification resolver\n+ applyConflictResolution
-    ResolvedKeepExisting --> CleanupConflicts : cleanup_resolved_conflicts
-    ResolvedKeepCandidate --> CleanupConflicts : cleanup_resolved_conflicts
-    ResolvedMerge --> CleanupConflicts : cleanup_resolved_conflicts
-    ResolvedKeepExisting --> SupersededItems : candidate superseded
-    ResolvedMerge --> SupersededItems : merged-from candidate superseded
-    SupersededItems --> CleanupItems : cleanup_stale_superseded_items
+    [*] --> ActiveItem : extract_items\n(LLM or pattern-based)
+    ActiveItem --> Superseded : explicit supersession\n(overrideConfidence = "explicit"\n+ supersedes = oldItemId)
+    ActiveItem --> ActiveItem : tentative/inferred override\n(both items coexist)
+    ActiveItem --> Superseded : subject-match fallback\n(same kind + subject,\nno LLM-directed supersession)
+    Superseded --> Cleanup : cleanup_stale_superseded_items\n(delete from DB + Qdrant)
 ```
-### Internal-Only Conflict Handling
+**Item extraction** uses LLM-powered extraction (with pattern-based fallback) to identify memorable information from conversation messages. Each extracted item belongs to one of six kinds:
+| Kind         | Description                                       | Base Lifetime |
+| ------------ | ------------------------------------------------- | ------------- |
+| `identity`   | Personal info, facts, relationships               | 6 months      |
+| `preference` | Likes, dislikes, preferred approaches/tools       | 3 months      |
+| `constraint` | Rules, requirements, directives                   | 1 month       |
+| `project`    | Project details, repos, tech stacks, action items | 2 weeks       |
+| `decision`   | Choices made, approaches selected                 | 2 weeks       |
+| `event`      | Deadlines, milestones, meetings, dates            | 3 days        |
+**Supersession chains** replace the old conflict resolution system. When the LLM extracts a new item that updates an existing one, it sets `supersedes` to the old item's ID and `overrideConfidence` to one of three levels:
+- `explicit` — Clear override signal (e.g. "I changed my mind about X"). The old item is marked `superseded` and removed from Qdrant.
+- `tentative` — Ambiguous; both items coexist as active.
+- `inferred` — Weak signal; both items coexist (logged for observability).
+A fallback subject-match supersession also runs for items without LLM-directed supersession: same kind + same subject = old item superseded.
+**Semantic density gating** skips extraction for messages that are too short, consist of low-value filler (e.g. "ok", "thanks", "got it"), or have fewer than 3 words.
+### Read Path — Hybrid Recall Pipeline
+The recall pipeline runs on every turn that passes the `needsMemory` gate (skips empty, very short, and tool-result-only turns). The pipeline is orchestrated by `buildMemoryRecall()` in `retriever.ts`:
+1. **Query construction** (`query-builder.ts`): Combines the user request text (up to 2000 chars) with any in-context session summary (up to 1200 chars).
+2. **Dense + sparse embedding generation**: The query is embedded using the configured embedding provider (auto-selection order: local → OpenAI → Gemini → Ollama). A TF-IDF sparse embedding is also generated in-process using FNV-1a hashing to a 30K vocabulary with sub-linear TF weighting and L2 normalization.
+3. **Hybrid search on Qdrant**: When both dense and sparse vectors are available, the pipeline uses Qdrant's query API with two prefetch stages (dense and sparse, each fetching up to 40 candidates) fused via Reciprocal Rank Fusion (RRF). Falls back to dense-only search when sparse vectors are unavailable.
+4. **Recency supplement**: A DB-only recency search fetches the 5 most recent segments from the current conversation, providing conversation-local context even when vector search misses.
+5. **Merge and deduplicate**: Hybrid and recency candidates are merged by key. Duplicate entries keep the highest scores from each source. A weighted final score is computed: `semantic * 0.7 + recency * 0.2 + confidence * 0.1`.
-Memory conflict resolution is entirely internal and non-interruptive. The conflict gate evaluates pending conflicts on each turn, dismisses non-actionable ones (based on kind policy, statement eligibility, coherence, and provenance), and attempts resolution when user input looks like a natural clarification. At no point does the conflict system produce user-facing clarification prompts, inject conflict instructions into the assistant's response, or block the user's request. The user is never aware that a conflict exists; the runtime response path always continues answering the user's actual request. This invariant is enforced across the conflict gate (`session-conflict-gate.ts`), session memory (`session-memory.ts`), session agent loop (`session-agent-loop.ts`), and runtime assembly (`session-runtime-assembly.ts`).
+6. **Tier classification** (`tier-classifier.ts`): Score-based, deterministic classification:
+   - `finalScore > 0.8` → **tier 1** (high relevance)
+   - `finalScore > 0.6` → **tier 2** (possibly relevant)
+   - Below 0.6 → dropped
-Runtime profile flow (per turn):
+7. **Staleness computation** (`staleness.ts`): Each item candidate is annotated with a staleness level based on its age relative to a kind-specific base lifetime (see table above). The effective lifetime is extended by a reinforcement factor: `baseLifetime * (1 + 0.3 * (sourceConversationCount - 1))`, so items mentioned across multiple conversations age more slowly. Staleness levels:
+   - `ratio < 0.5` → `fresh`
+   - `ratio <= 1.0` → `aging`
+   - `ratio <= 2.0` → `stale`
+   - `ratio > 2.0` → `very_stale`
-1. `ProfileCompiler` builds a trusted profile block from active `profile` / `preference` / `constraint` / `instruction` items under strict token cap.
-2. Session injects that block only into runtime prompt state.
-3. Session strips the injected profile block before persisting conversation history, so dynamic profile context never pollutes durable message rows.
+8. **Stale demotion**: `very_stale` tier 1 candidates are demoted to tier 2, preventing old information from occupying prime injection space.
-### Provenance-Aware Memory Pipeline
+9. **Two-layer XML injection** (`formatting.ts`): Budget-aware rendering into four XML sections:
-Every persisted message carries provenance metadata (`provenanceTrustClass`, `provenanceSourceChannel`, etc.) derived from the `TrustContext` resolved by `trust-context-resolver.ts`. This metadata records the trust class of the actor who produced the message and through which channel, enabling downstream trust decisions without re-resolving identity at read time.
+   ```xml
+   <memory_context>
+   <user_identity>
+   <!-- identity-kind tier 1 items (plain statements) -->
+   </user_identity>
+   <relevant_context>
+   <!-- tier 1 non-identity/non-preference items (episode-wrapped with source attribution) -->
+   </relevant_context>
+   <applicable_preferences>
+   <!-- preference/constraint tier 1 items (plain statements) -->
+   </applicable_preferences>
+   <possibly_relevant>
+   <!-- tier 2 items (episode-wrapped with staleness annotations) -->
+   </possibly_relevant>
+   </memory_context>
+   ```
+   Empty sections are omitted. Each section has a per-item token budget (150 tokens for tier 1, 100 for tier 2). Tier 1 sections consume budget first; tier 2 uses the remainder.
+10. **Injection strategy**: The rendered `<memory_context>` block is injected as a separate user + assistant acknowledgment message pair before the last user message (`injectMemoryRecallAsSeparateMessage`). This separates memory context from the user's actual query.
+### Internal-Only Trust Gating
+**Provenance-aware pipeline**: Every persisted message carries provenance metadata (`provenanceTrustClass`, `provenanceSourceChannel`, etc.) derived from the `TrustContext` resolved by `trust-context-resolver.ts`.
 Two trust gates enforce trust-class-based access control over the memory pipeline:
-- **Write gate** (`indexer.ts`): The `extract_items` and `resolve_conflicts` jobs only run for messages from trusted actors (guardian or undefined provenance). Messages from untrusted actors (`trusted_contact`, `unknown`) are still segmented and embedded — so they appear in conversation context — but no profile extraction or conflict resolution is triggered. This prevents untrusted channels from injecting or mutating long-term memory items.
+- **Write gate** (`indexer.ts`): The `extract_items` job only runs for messages from trusted actors (guardian or undefined provenance). Messages from untrusted actors (`trusted_contact`, `unknown`) are still segmented and embedded — so they appear in conversation context — but no item extraction is triggered. This prevents untrusted channels from injecting or mutating long-term memory items.
-- **Read gate** (`session-memory.ts`): When the current session's actor is untrusted, the memory recall pipeline returns a no-op context — no recall injection, no dynamic profile, no conflict resolution. This ensures untrusted actors cannot surface or exploit previously extracted memory.
+- **Read gate** (`session-memory.ts`): When the current session's actor is untrusted, the memory recall pipeline returns a no-op context — no recall injection. This ensures untrusted actors cannot surface or exploit previously extracted memory.
 Trust policy is **cross-channel and trust-class-based**: decisions use `trustContext.trustClass`, not the channel string. Desktop sessions default to `trustClass: 'guardian'`. External channels (Telegram, WhatsApp, phone) provide explicit trust context via the resolver. Messages without provenance metadata are treated as trusted (guardian); all new messages carry provenance.
+### Embedding Backend Selection
+The embedding backend is selected based on `memory.embeddings.provider` config:
+- `auto` (default): Tries local → OpenAI → Gemini → Ollama, using the first available.
+- `local`: ONNX-based local model (bge-small-en-v1.5). Lazy-loaded to avoid crashing in compiled binaries where onnxruntime-node is unavailable.
+- `openai`: OpenAI text-embedding-3-small. Requires `apiKeys.openai`.
+- `gemini`: Gemini gemini-embedding-001. Requires `apiKeys.gemini`. Only backend supporting multimodal embeddings (images, audio, video).
+- `ollama`: Ollama nomic-embed-text. Requires Ollama to be configured.
+An in-memory LRU vector cache (32 MB cap, keyed by `sha256(provider + model + content)`) avoids redundant embedding calls for identical content. Sparse embeddings are generated in-process (no external calls).
+### Graceful Degradation
+When the embedding backend or Qdrant is unavailable:
+- A **circuit breaker** on Qdrant (`qdrant-circuit-breaker.ts`) tracks consecutive failures and short-circuits search calls when the breaker is open.
+- If embedding generation fails and `memory.embeddings.required` is `true`, recall returns an empty result with a degradation status (`embedding_generation_failed` or `embedding_provider_down`).
+- If embeddings are optional (default), the pipeline falls back to recency-only search.
+- Degradation status is reported to clients via `memory_status` events.
 ---
 ## Private Threads — Isolated Memory and Strict Side-Effect Controls
@@ -289,8 +353,6 @@ graph TB
 **Read fallback**: When recalling memories for a private thread, the retriever queries both the thread's own scope and the `'default'` scope. This ensures the assistant still has access to general knowledge (user profile, preferences, facts) learned in standard threads, while private-thread-specific memories take precedence in ranking. The fallback is implemented via `ScopePolicyOverride` with `fallbackToDefault: true`, which overrides the global scope policy on a per-call basis.
-**Profile compilation**: The `ProfileCompiler` also respects this dual-scope behavior for private threads — it includes profile/preference/constraint items from both the private scope and the default scope when building the runtime profile block.
 ### SessionMemoryPolicy
 The daemon derives a `SessionMemoryPolicy` from the conversation's `thread_type` and `memory_scope_id` when creating or restoring a session:
@@ -333,8 +395,7 @@ This ensures that file writes, bash commands, host operations, and other mutatin
 | `assistant/src/tools/executor.ts`            | `forcePromptSideEffects` gate — promotes allow to prompt for side-effect tools             |
 | `assistant/src/memory/search/types.ts`       | `ScopePolicyOverride` interface for per-call scope control                                 |
 | `assistant/src/memory/retriever.ts`          | `buildScopeFilter()` — builds scope ID list from override or global config                 |
-| `assistant/src/memory/profile-compiler.ts`   | Dual-scope profile compilation with `includeDefaultFallback`                               |
-| `assistant/src/daemon/session-memory.ts`     | Wires `scopeId` and `includeDefaultFallback` into recall and profile compilation           |
+| `assistant/src/daemon/session-memory.ts`     | Wires `scopeId` and `includeDefaultFallback` into recall                                   |
 ---
@@ -387,7 +448,7 @@ graph TB
 ### Cache compatibility
-The Anthropic provider places `cache_control: { type: 'ephemeral' }` on the **last content block** of the last two user turns. Since workspace context is prepended (first block), the cache breakpoint correctly lands on the trailing user text or dynamic profile block. This is validated by dedicated cache-compatibility tests.
+The Anthropic provider places `cache_control: { type: 'ephemeral' }` on the **last content block** of the last two user turns. Since workspace context is prepended (first block), the cache breakpoint correctly lands on the trailing user text block. This is validated by dedicated cache-compatibility tests.
 ### Key files
@@ -425,7 +486,7 @@ graph TB
 - **Fresh each turn**: `buildTemporalContext()` is called at the start of every agent loop invocation, ensuring the model always sees the current date even in long-running conversations.
 - **Clock source invariant**: Absolute time (`now`) always comes from the assistant host clock (`Date.now()`), never from channel/client clocks.
-- **Timezone precedence**: If `ui.userTimezone` is configured, temporal context uses it for local-date interpretation. Otherwise it falls back to dynamic profile memory, then assistant host timezone.
+- **Timezone precedence**: If `ui.userTimezone` is configured, temporal context uses it for local-date interpretation. Otherwise it falls back to memory-stored timezone, then assistant host timezone.
 - **Timezone-aware**: Uses `Intl.DateTimeFormat` APIs for DST-safe date arithmetic and timezone validation/canonicalization.
 - **Bounded output**: Hard-capped at 1500 characters and 14 horizon entries to prevent prompt bloat.
 - **Runtime-only**: The injected `<temporal_context>` block is stripped from `this.messages` after the agent loop completes via `stripTemporalContext`. It never persists in conversation history.