memory-crystal 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.env.example +20 -0
  2. package/CHANGELOG.md +6 -0
  3. package/LETTERS.md +22 -0
  4. package/LICENSE +21 -0
  5. package/README-ENTERPRISE.md +162 -0
  6. package/README-old.md +275 -0
  7. package/README.md +91 -0
  8. package/RELAY.md +88 -0
  9. package/TECHNICAL.md +379 -0
  10. package/ai/dev-updates/2026-02-25--cc-air--phase2-architecture-pivot.md +70 -0
  11. package/ai/dev-updates/2026-02-25--cc-air--phase2-worker-build.md +72 -0
  12. package/ai/dev-updates/2026-02-26--10-25-16--cc-mini--phase2-implementation.md +49 -0
  13. package/ai/dev-updates/2026-02-27--20-30-00--cc-mini--readme-overhaul-and-public-deploy.md +69 -0
  14. package/ai/notes/2026-02-26--cc-air--notes.md +412 -0
  15. package/ai/notes/2026-02-27--cc-mini--grok-feedback.md +44 -0
  16. package/ai/notes/2026-02-27--cc-mini--lesa-feedback.md +45 -0
  17. package/ai/notes/RESEARCH.md +1185 -0
  18. package/ai/notes/salience-research/README.md +29 -0
  19. package/ai/notes/salience-research/eurosla-salience-review.md +64 -0
  20. package/ai/notes/salience-research/full-research-summary.md +269 -0
  21. package/ai/notes/salience-research/salience-levels-diagram.png +0 -0
  22. package/ai/plan/2026-02-27--cc-mini--qr-pairing-spec.md +203 -0
  23. package/ai/plan/_archive/PLAN.md +194 -0
  24. package/ai/plan/_archive/PRD.md +1014 -0
  25. package/ai/plan/cc-plans-duplicates-from-dot-claude/2026-02-26--cc-mini--phase2-implementation-plan.md +245 -0
  26. package/ai/plan/dev-conventions-note.md +70 -0
  27. package/ai/plan/ldm-os-install-and-boot-architecture.md +285 -0
  28. package/ai/plan/memory-crystal-phase2-plan.md +192 -0
  29. package/ai/plan/memory-system-lay-of-the-land.md +214 -0
  30. package/ai/plan/phase2-ephemeral-relay.md +238 -0
  31. package/ai/plan/readme-first.md +68 -0
  32. package/ai/plan/roadmap.md +159 -0
  33. package/ai/todos/PUNCHLIST.md +44 -0
  34. package/ai/todos/README.md +31 -0
  35. package/ai/todos/inboxes/cc-air/2026-02-26--cc-air--post-relay-todos.md +85 -0
  36. package/ai/todos/inboxes/cc-mini/2026-02-26--cc-mini--phase2-status.md +100 -0
  37. package/ai/todos/inboxes/cc-mini/_archive/TODO.md +25 -0
  38. package/ai/todos/inboxes/parker/2026-02-25--cc-air--setup-checklist.md +139 -0
  39. package/ai/todos/inboxes/parker/2026-02-26--cc-mini--phase2-your-moves.md +72 -0
  40. package/dist/cc-hook.d.ts +1 -0
  41. package/dist/cc-hook.js +349 -0
  42. package/dist/chunk-3VFIJYS4.js +818 -0
  43. package/dist/chunk-52QE3YI3.js +1169 -0
  44. package/dist/chunk-AA3OPP4Z.js +432 -0
  45. package/dist/chunk-D3I3ZSE2.js +411 -0
  46. package/dist/chunk-EKSACBTJ.js +1070 -0
  47. package/dist/chunk-F3Y7EL7K.js +83 -0
  48. package/dist/chunk-JWZXYVET.js +1068 -0
  49. package/dist/chunk-KYVWO6ZM.js +1069 -0
  50. package/dist/chunk-L3VHARQH.js +413 -0
  51. package/dist/chunk-LOVAHSQV.js +411 -0
  52. package/dist/chunk-LQOYCAGG.js +446 -0
  53. package/dist/chunk-MK42FMEG.js +147 -0
  54. package/dist/chunk-NIJCVN3O.js +147 -0
  55. package/dist/chunk-O2UITJGH.js +465 -0
  56. package/dist/chunk-PEK6JH65.js +432 -0
  57. package/dist/chunk-PJ6FFKEX.js +77 -0
  58. package/dist/chunk-PLUBBZYR.js +800 -0
  59. package/dist/chunk-SGL6ISBJ.js +1061 -0
  60. package/dist/chunk-UNHVZB5G.js +411 -0
  61. package/dist/chunk-VAFTWSTE.js +1061 -0
  62. package/dist/chunk-XZ3S56RQ.js +1061 -0
  63. package/dist/chunk-Y72C7F6O.js +148 -0
  64. package/dist/cli.d.ts +1 -0
  65. package/dist/cli.js +325 -0
  66. package/dist/core.d.ts +188 -0
  67. package/dist/core.js +12 -0
  68. package/dist/crypto.d.ts +16 -0
  69. package/dist/crypto.js +18 -0
  70. package/dist/dev-update-SZ2Z4WCQ.js +6 -0
  71. package/dist/ldm.d.ts +17 -0
  72. package/dist/ldm.js +12 -0
  73. package/dist/mcp-server.d.ts +1 -0
  74. package/dist/mcp-server.js +250 -0
  75. package/dist/migrate.d.ts +1 -0
  76. package/dist/migrate.js +89 -0
  77. package/dist/mirror-sync.d.ts +1 -0
  78. package/dist/mirror-sync.js +130 -0
  79. package/dist/openclaw.d.ts +5 -0
  80. package/dist/openclaw.js +349 -0
  81. package/dist/poller.d.ts +1 -0
  82. package/dist/poller.js +272 -0
  83. package/dist/summarize.d.ts +19 -0
  84. package/dist/summarize.js +10 -0
  85. package/dist/worker.js +137 -0
  86. package/openclaw.plugin.json +11 -0
  87. package/package.json +40 -0
  88. package/scripts/migrate-lance-to-sqlite.mjs +217 -0
  89. package/skills/memory/SKILL.md +61 -0
  90. package/src/cc-hook.ts +447 -0
  91. package/src/cli.ts +356 -0
  92. package/src/core.ts +1472 -0
  93. package/src/crypto.ts +113 -0
  94. package/src/dev-update.ts +178 -0
  95. package/src/ldm.ts +117 -0
  96. package/src/mcp-server.ts +274 -0
  97. package/src/migrate.ts +104 -0
  98. package/src/mirror-sync.ts +175 -0
  99. package/src/openclaw.ts +250 -0
  100. package/src/poller.ts +345 -0
  101. package/src/summarize.ts +210 -0
  102. package/src/worker.ts +208 -0
  103. package/tsconfig.json +18 -0
  104. package/wrangler.toml +20 -0
@@ -0,0 +1,1185 @@
1
+ # Memory Crystal: Technical Research Document
2
+
3
+ ## Sovereign, Self-Hosted Memory System for AI Agents
4
+ ### An OpenClaw Plugin Providing Supermemory-Level Functionality, Fully Local
5
+
6
+ **Date:** 2026-02-07
7
+ **Status:** Research Phase
8
+ **Target:** OpenClaw plugin (`@openclaw/memory-crystal`)
9
+
10
+ ---
11
+
12
+ ## Table of Contents
13
+
14
+ 1. [Vector Search Infrastructure (Local)](#1-vector-search-infrastructure-local)
15
+ 2. [Knowledge Graph for Memory](#2-knowledge-graph-for-memory)
16
+ 3. [Ingestion Pipeline](#3-ingestion-pipeline)
17
+ 4. [Retrieval Quality](#4-retrieval-quality)
18
+ 5. [Connectors / Ingestion Sources](#5-connectors--ingestion-sources)
19
+ 6. [Memory Evolution](#6-memory-evolution)
20
+ 7. [Existing Open Source Landscape](#7-existing-open-source-landscape)
21
+ 8. [MCP Integration](#8-mcp-integration)
22
+ 9. [Recommended Architecture](#9-recommended-architecture)
23
+
24
+ ---
25
+
26
+ ## 1. Vector Search Infrastructure (Local)
27
+
28
+ ### Comparison Matrix
29
+
30
+ | Feature | sqlite-vec | LanceDB | ChromaDB | Qdrant (self-hosted) | pgvector |
31
+ |---|---|---|---|---|---|
32
+ | **Embedding** | Yes (SQLite ext) | Yes (library) | Yes (library) | No (server) | No (server) |
33
+ | **Node.js/TS SDK** | Yes (`sqlite-vec` npm) | Yes (native TS SDK) | Yes (JS client) | Yes (JS client) | Yes (via pg) |
34
+ | **ANN Index** | No (brute-force only) | Yes (IVF-PQ, HNSW) | Yes (HNSW via hnswlib) | Yes (HNSW) | Yes (HNSW, IVF) |
35
+ | **Full-Text/BM25** | Via FTS5 (SQLite native) | Yes (built-in BM25) | No (vector only) | Yes (sparse vectors) | Yes (tsvector) |
36
+ | **Hybrid Search** | Manual combo | Built-in | No | Built-in | Manual combo |
37
+ | **Disk-based** | Yes | Yes (Apache Arrow) | Partially (sqlite backend) | Yes | Yes |
38
+ | **Zero-config** | Yes | Yes | Yes | No (Docker) | No (Docker/install) |
39
+ | **Binary quantization** | Yes | Yes | No | Yes | No |
40
+ | **Matryoshka support** | Yes (truncatable) | Yes | No | Yes | No |
41
+
42
+ ### Performance at Scale
43
+
44
+ **sqlite-vec (brute-force)**
45
+ - 10K chunks (768-dim float): ~2ms query latency
46
+ - 100K chunks (768-dim float): ~75ms query latency
47
+ - 1M chunks (128-dim float): ~17ms query (static), ~35ms (vec0 virtual table)
48
+ - 1M chunks (768-dim float): estimated ~100-200ms
49
+ - Bit vectors dramatically faster: 3072-dim bit vectors query in ~11ms at 100K scale
50
+ - **Limitation:** No ANN indexing; purely brute-force. Performance degrades linearly with dataset size.
51
+
52
+ **LanceDB (IVF-PQ + disk)**
53
+ - 10K chunks: <5ms query latency
54
+ - 100K chunks: ~25ms query latency
55
+ - 1M chunks: ~25ms with indexing (near in-memory from disk via memory-mapped Arrow)
56
+ - Achieves ~95% recall accuracy with advanced indexing
57
+ - **Strength:** Performance remains relatively flat due to IVF-PQ indexing. Best disk-to-query speed ratio.
58
+
59
+ **ChromaDB (HNSW)**
60
+ - 10K chunks: ~5ms query latency
61
+ - 100K chunks: ~10-20ms query latency
62
+ - 1M chunks: memory pressure issues; HNSW index held in-memory
63
+ - 2025 Rust-core rewrite delivers 4x faster writes and queries vs original Python implementation
64
+ - **Limitation:** Index must fit in memory for large collections.
65
+
66
+ **Qdrant (HNSW + quantization)**
67
+ - Best absolute performance at all scales
68
+ - 10K-1M chunks: consistently <10ms with proper configuration
69
+ - **Limitation:** Requires running a separate server (Docker). Overkill for single-user local.
70
+
71
+ **pgvector (HNSW)**
72
+ - Good up to ~10M vectors with <100ms latency
73
+ - Requires PostgreSQL installation
74
+ - **Limitation:** Heavy dependency for a local-first plugin.
75
+
76
+ ### Recommendation: LanceDB
77
+
78
+ **LanceDB is the clear winner for memory-crystal.** Rationale:
79
+
80
+ 1. **Embedded library** -- no server process, just `npm install @lancedb/lancedb`. Same deployment model as SQLite.
81
+ 2. **Native TypeScript SDK** -- first-class Node.js/TS support, used by Continue IDE for exactly this use case.
82
+ 3. **Built-in hybrid search** -- BM25 full-text search + vector similarity in one query, no manual fusion needed.
83
+ 4. **Disk-efficient** -- Apache Arrow columnar format with memory-mapped access. Near in-memory speed from disk.
84
+ 5. **Scales to 1M+** -- IVF-PQ indexing keeps query times flat. sqlite-vec degrades linearly.
85
+ 6. **SQL-like filtering** -- metadata filtering built-in, important for time-based and source-based queries.
86
+
87
+ **Fallback consideration:** sqlite-vec is excellent as a lightweight fallback for <100K chunks. Its zero-dependency nature (pure SQLite extension) and bit-vector quantization support make it viable for a "lite mode." Consider offering both backends.
88
+
89
+ ### Embedding Models
90
+
91
+ | Model | Dims | Context | Local? | Quality (MTEB) | Speed | Cost |
92
+ |---|---|---|---|---|---|---|
93
+ | **nomic-embed-text-v1.5** | 768 (truncatable to 256/384) | 8,192 tokens | Yes (Ollama) | Beats text-embedding-3-small | ~50ms/chunk (GPU) | Free |
94
+ | **all-MiniLM-L6-v2** | 384 | 256 tokens | Yes (ONNX/Ollama) | Lower (-5-8%) | ~15ms/chunk (CPU!) | Free |
95
+ | **text-embedding-3-small** | 1536 (truncatable) | 8,191 tokens | No (API) | Good baseline | ~20ms/chunk | $0.02/1M tokens |
96
+ | **nomic-embed-text-v2** | 768 | 8,192 tokens | Yes (Ollama) | SOTA open-source | ~60ms/chunk (GPU) | Free |
97
+ | **BGE-base-en-v1.5** | 768 | 512 tokens | Yes (ONNX) | Strong | ~30ms/chunk | Free |
98
+
99
+ ### Recommendation: nomic-embed-text-v1.5
100
+
101
+ - Outperforms OpenAI text-embedding-3-small on both MTEB and long-context (LoCo) benchmarks
102
+ - 8,192 token context window (critical for larger chunks)
103
+ - Matryoshka representation learning: truncate to 256 dims for fast search, use full 768 for reranking
104
+ - Runs locally via Ollama with no API dependency
105
+ - **Fallback:** all-MiniLM-L6-v2 for CPU-only environments (384 dims, fast but lower quality, only 256 token context)
106
+ - **Optional:** text-embedding-3-small as a cloud option for users who prefer API-based embeddings
107
+
108
+ ---
109
+
110
+ ## 2. Knowledge Graph for Memory
111
+
112
+ ### How Supermemory's Knowledge Graph Works
113
+
114
+ Based on their [blog post on the memory engine](https://supermemory.ai/blog/memory-engine/):
115
+
116
+ - **Hierarchical memory layers** inspired by human cognition: working memory, short-term memory, long-term storage
117
+ - Hot/recent data stays instantly accessible (uses Cloudflare KV for their hosted version)
118
+ - Deeper memories retrieved on-demand
119
+ - **Intelligent decay:** information gradually loses priority based on usage patterns
120
+ - **Continuous summary updates** across information clusters
121
+ - **Connection detection** between seemingly unrelated data
122
+ - **Non-literal query support** for semantic understanding
123
+ - Target: sub-400ms latency
124
+
125
+ Supermemory is primarily a cloud service. Their open-source components focus on the MCP server and browser extension, not the core memory engine.
126
+
127
+ ### Mem0's Graph Memory Architecture
128
+
129
+ Source: [Mem0 Graph Memory docs](https://docs.mem0.ai/open-source/features/graph-memory), [Mem0 paper (arxiv)](https://arxiv.org/html/2504.19413v1)
130
+
131
+ Mem0g (graph variant) represents memories as a **directed labeled graph**:
132
+
133
+ - **Nodes** = entities (people, places, concepts, events) with types, embeddings, and metadata
134
+ - **Edges** = relationship triplets: `(source_entity, relation, destination_entity)`
135
+ - Uses **Neo4j** as the graph database backend
136
+
137
+ **Three-phase pipeline:**
138
+
139
+ 1. **Extraction Phase:** An LLM-based extractor processes the most recent M messages, identifying entities and extracting candidate memory facts.
140
+
141
+ 2. **Update Phase:** For each candidate fact, retrieves S most similar existing memories from the database. An LLM decides one of four operations via tool calling:
142
+ - `ADD` -- new memory, no semantic equivalent exists
143
+ - `UPDATE` -- augment existing memory with complementary info
144
+ - `DELETE` -- remove memory contradicted by new info
145
+ - `NOOP` -- no change needed
146
+
147
+ 3. **Retrieval Phase:** Two strategies:
148
+ - **Entity-centric:** identify key entities in query, find their nodes and relationships
149
+ - **Semantic triplet:** encode entire query as dense embedding, match against relationship triplet embeddings
150
+
151
+ **Conflict detection:** When new info conflicts with existing relationships, an LLM-based resolver marks old relationships as obsolete (not deleted), preserving temporal reasoning.
152
+
153
+ **Performance:** 68.4% accuracy on DMR benchmark, 0.66s median search latency.
154
+
155
+ ### Microsoft GraphRAG
156
+
157
+ Source: [Microsoft GraphRAG](https://microsoft.github.io/graphrag/)
158
+
159
+ GraphRAG builds entity-centric knowledge graphs by:
160
+ 1. Extracting entities and relationships from text using LLMs
161
+ 2. Grouping entities into thematic clusters ("communities") using graph algorithms (Leiden community detection)
162
+ 3. Pre-computing LLM-generated summaries of each community
163
+ 4. At query time, using community summaries + graph structure to augment prompts
164
+
165
+ **Key insight:** 70-80% superiority over traditional RAG for questions requiring "connecting the dots" across disparate information, while using 97% fewer tokens.
166
+
167
+ **Limitation for memory-crystal:** GraphRAG is designed for batch processing of static document corpora, not real-time incremental updates. It requires full recomputation when new data arrives.
168
+
169
+ ### Zep/Graphiti Temporal Knowledge Graph
170
+
171
+ Source: [Graphiti GitHub](https://github.com/getzep/graphiti), [Zep paper (arxiv)](https://arxiv.org/abs/2501.13956)
172
+
173
+ Graphiti is the most relevant approach for agent memory:
174
+
175
+ - **Bi-temporal data model:**
176
+ - Event Time (T): when a fact actually occurred
177
+ - Ingestion Time (T'): when the system learned about it
178
+ - **Real-time incremental updates** -- no batch recomputation needed
179
+ - **Conflict resolution** with temporal awareness
180
+ - **Point-in-time queries** -- "What did I know on December 1st?"
181
+ - **P95 latency: 300ms**
182
+ - 18.5% accuracy improvement over baselines
183
+ - Supports Neo4j, FalkorDB, Kuzu as graph backends
184
+
185
+ **MCP server tools exposed:**
186
+ - `add_episode` -- add information to the knowledge graph
187
+ - `search_nodes` -- search for relevant entity summaries
188
+ - `search_facts` -- search for relevant facts/edges
189
+ - `delete_entity_edge` -- remove entities or edges
190
+ - `delete_episode` -- remove episodes
191
+
192
+ ### Recommendation: Simplified Graph for memory-crystal
193
+
194
+ **Do not use Neo4j.** It is too heavy for a local-first, single-user OpenClaw plugin. Instead, implement a lightweight graph structure using SQLite (via better-sqlite3) alongside LanceDB:
195
+
196
+ ```
197
+ Proposed Schema (SQLite):
198
+
199
+ entities:
200
+ - id: TEXT PRIMARY KEY
201
+ - name: TEXT
202
+ - entity_type: TEXT (person, place, concept, project, preference, fact)
203
+ - created_at: INTEGER (unix timestamp)
204
+ - updated_at: INTEGER
205
+ - access_count: INTEGER
206
+ - last_accessed: INTEGER
207
+ - decay_score: REAL (0.0-1.0)
208
+ - summary: TEXT (LLM-generated, updated on consolidation)
209
+
210
+ relations:
211
+ - id: TEXT PRIMARY KEY
212
+ - source_entity_id: TEXT FK
213
+ - target_entity_id: TEXT FK
214
+ - relation_type: TEXT (e.g., "works_on", "prefers", "knows", "is_located_in")
215
+ - weight: REAL (confidence/strength)
216
+ - created_at: INTEGER
217
+ - updated_at: INTEGER
218
+ - valid_from: INTEGER (temporal: when fact became true)
219
+ - valid_until: INTEGER (NULL = still valid)
220
+ - source_memory_id: TEXT (which memory established this)
221
+
222
+ observations:
223
+ - id: TEXT PRIMARY KEY
224
+ - entity_id: TEXT FK
225
+ - content: TEXT (atomic fact)
226
+ - created_at: INTEGER
227
+ - source_memory_id: TEXT
228
+ - confidence: REAL
229
+
230
+ memories:
231
+ - id: TEXT PRIMARY KEY
232
+ - content: TEXT (original text)
233
+ - contextualized_content: TEXT (with prepended context)
234
+ - source_type: TEXT (imessage, browser, file, manual, conversation)
235
+ - source_id: TEXT (file path, URL, chat ID, etc.)
236
+ - chunk_index: INTEGER (position within source document)
237
+ - created_at: INTEGER
238
+ - updated_at: INTEGER
239
+ - access_count: INTEGER
240
+ - last_accessed: INTEGER
241
+ - decay_score: REAL
242
+ - is_active: BOOLEAN
243
+ - superseded_by: TEXT (FK to newer memory, for contradiction handling)
244
+ ```
245
+
246
+ **Graph operations** implemented as SQLite queries with recursive CTEs for traversal. This avoids the Neo4j dependency while providing the essential graph capabilities:
247
+ - Entity-to-entity traversal (2-3 hops)
248
+ - Temporal queries (valid_from/valid_until)
249
+ - Decay-weighted retrieval
250
+ - Community detection via simple connected-components algorithm
251
+
252
+ ---
253
+
254
+ ## 3. Ingestion Pipeline
255
+
256
+ ### Chunking Strategy Comparison
257
+
258
+ | Strategy | Quality | Speed | Complexity | Best For |
259
+ |---|---|---|---|---|
260
+ | **Fixed-size** | Low | Fastest | Trivial | Prototyping only |
261
+ | **Recursive character splitting** | Good | Fast | Low | General text, markdown |
262
+ | **Semantic chunking** | Best (+2-9% recall) | Slow (needs embeddings) | Medium | Long documents, mixed content |
263
+ | **AST-aware (tree-sitter)** | Best for code | Medium | Medium | Source code |
264
+ | **Document-structure-aware** | Best for structured docs | Medium | Medium | PDFs, HTML |
265
+
266
+ ### Recommended Chunking Pipeline
267
+
268
+ **Phase 1: Content-type detection and routing**
269
+
270
+ ```
271
+ Input -> detect_content_type() -> route to chunker:
272
+ - .md, .txt -> MarkdownChunker (recursive, heading-aware)
273
+ - .ts, .js, .py, etc. -> CodeChunker (tree-sitter AST-aware)
274
+ - .pdf -> PDFChunker (page + paragraph-aware)
275
+ - URL -> HTMLChunker (semantic blocks)
276
+ - iMessage, email -> ConversationChunker (message-group-aware)
277
+ ```
278
+
279
+ **Phase 2: Chunking**
280
+
281
+ Start with **recursive character splitting at 400-512 tokens with 10-20% overlap**, which is the established best practice and LangChain default. Graduate to semantic chunking if retrieval metrics warrant the extra compute cost.
282
+
283
+ For code, use [supermemory/code-chunk](https://github.com/supermemoryai/code-chunk):
284
+ - TypeScript library, AST-aware via tree-sitter
285
+ - Splits at semantic boundaries (functions, classes, methods)
286
+ - Five-step process: Parse -> Extract -> Build Scope Tree -> Chunk -> Enrich
287
+ - Produces `contextualizedText` with scope chain, entity definitions, sibling info, and import dependencies
288
+ - Supports: TypeScript, JavaScript, Python, Rust, Go, Java
289
+ - Config: `maxChunkSize: 1500`, `contextMode: 'full'`, `overlayLines: 10`
290
+
291
+ **Phase 3: Contextual enrichment (Anthropic's approach)**
292
+
293
+ Source: [Anthropic Contextual Retrieval](https://www.anthropic.com/news/contextual-retrieval)
294
+
295
+ Before embedding each chunk, prepend a 50-100 token context summary that situates the chunk within the larger document:
296
+
297
+ ```
298
+ Prompt template:
299
+ "Here is the document: <document>{WHOLE_DOCUMENT}</document>
300
+ Here is the chunk we want to situate: <chunk>{CHUNK_CONTENT}</chunk>
301
+ Please give a short succinct context to situate this chunk within
302
+ the overall document for the purposes of improving search retrieval
303
+ of the chunk. Answer only with the succinct context and nothing else."
304
+ ```
305
+
306
+ **Performance impact:**
307
+ - Contextual Embeddings alone: 35% reduction in retrieval failures
308
+ - Contextual Embeddings + Contextual BM25: 49% reduction in failures
309
+ - Combined with reranking: **67% reduction** in failures
310
+
311
+ **Cost optimization:** Use Anthropic's prompt caching (or a smaller/local model) to generate context. Estimated cost: ~$1.02 per million document tokens with prompt caching. For fully local: use a small local LLM (e.g., Llama 3.2 3B via Ollama) for context generation.
312
+
313
+ **Phase 4: Embedding and indexing**
314
+
315
+ ```
316
+ chunk + contextual_prefix -> embed(nomic-embed-text-v1.5) -> store in LanceDB
317
+ chunk + contextual_prefix -> tokenize -> BM25 index (LanceDB FTS)
318
+ chunk -> extract_entities_and_relations() -> store in SQLite graph
319
+ ```
320
+
321
+ ### Content-Type Specific Strategies
322
+
323
+ **Markdown:** Split on headings (##, ###), then recursive split within sections. Preserve heading hierarchy as metadata.
324
+
325
+ **Code:** Use code-chunk library. Preserve function/class scope chains. Include import context.
326
+
327
+ **PDFs:** Extract text per page, then recursive split. Preserve page numbers as metadata. Consider table extraction separately.
328
+
329
+ **URLs/HTML:** Strip boilerplate (navigation, footers), extract main content, split on semantic blocks (paragraphs, sections).
330
+
331
+ **Conversations (iMessage, email):** Group by conversation thread. Chunk by message groups (not individual messages). Preserve sender and timestamp metadata.
332
+
333
+ ---
334
+
335
+ ## 4. Retrieval Quality
336
+
337
+ ### Hybrid Search Architecture
338
+
339
+ ```
340
+ Query
341
+ |
342
+ v
343
+ [Query Rewriter] -- optional: rephrase for better retrieval
344
+ |
345
+ v
346
+ +---+---+
347
+ | |
348
+ v v
349
+ [Vector Search] [BM25 Keyword Search]
350
+ (LanceDB ANN) (LanceDB FTS)
351
+ | |
352
+ v v
353
+ [Reciprocal Rank Fusion]
354
+ |
355
+ v
356
+ [Reranker] -- cross-encoder rescore top-K
357
+ |
358
+ v
359
+ [Top-N Results]
360
+ ```
361
+
362
+ ### Hybrid Search: Vector + BM25
363
+
364
+ LanceDB supports this natively. The fusion approach:
365
+
366
+ ```typescript
367
+ // LanceDB hybrid search (conceptual)
368
+ const results = await table
369
+ .search(queryEmbedding) // vector similarity
370
+ .fullTextSearch(queryText) // BM25
371
+ .rerank(RRF()) // Reciprocal Rank Fusion
372
+ .limit(20)
373
+ .execute();
374
+ ```
375
+
376
+ **Reciprocal Rank Fusion (RRF)** formula:
377
+ ```
378
+ RRF_score(d) = sum(1 / (k + rank_i(d))) for each ranking system i
379
+ ```
380
+ Where k=60 is standard. This elegantly combines rankings without needing normalized scores.
381
+
382
+ An alternative composite scoring approach:
383
+ ```
384
+ FinalScore = (VectorScore * 0.5) + (KeywordScore * 0.3) + (RecencyScore * 0.2)
385
+ ```
386
+
387
+ ### Query Rewriting / HyDE
388
+
389
+ Source: [HyDE paper](https://arxiv.org/abs/2212.10496)
390
+
391
+ **HyDE (Hypothetical Document Embeddings):**
392
+ 1. Given a user query, use an LLM to generate a hypothetical "ideal answer document"
393
+ 2. Embed this hypothetical document instead of the raw query
394
+ 3. Search the vector store with this embedding
395
+ 4. The hypothetical document captures relevance patterns even if details are inaccurate
396
+
397
+ **When to use HyDE:** Complex, abstract, or multi-faceted queries where the raw query embedding poorly matches stored chunks. For simple factual queries, direct embedding is sufficient.
398
+
399
+ **Implementation for memory-crystal:**
400
+ ```typescript
401
+ async function hydeSearch(query: string): Promise<SearchResult[]> {
402
+ const hypothetical = await llm.generate(
403
+ `Write a short paragraph that would be the ideal answer to: "${query}"`
404
+ );
405
+ const embedding = await embed(hypothetical);
406
+ return vectorStore.search(embedding);
407
+ }
408
+ ```
409
+
410
+ **Cost consideration:** HyDE requires an LLM call per query. Use it selectively -- when the initial retrieval returns low-confidence results, retry with HyDE.
411
+
412
+ ### Reranking
413
+
414
+ **Options (ranked by preference for local-first):**
415
+
416
+ 1. **Local cross-encoder (recommended):** `cross-encoder/ms-marco-MiniLM-L-6-v2`
417
+ - Runs locally via ONNX runtime or sentence-transformers
418
+ - 200-500ms latency for reranking 20 documents
419
+ - 20-35% accuracy improvement over vector-only retrieval
420
+ - No API dependency
421
+
422
+ 2. **Cohere Rerank 4:** Best quality, but requires API call ($1/1K searches)
423
+ - Self-learning capability (improves with usage)
424
+ - For users who want best-in-class reranking
425
+
426
+ 3. **LLM-as-reranker:** Use the agent's own LLM to score relevance
427
+ - Most flexible, works with any model
428
+ - Higher latency, higher cost
429
+
430
+ **Recommendation:** Ship with local cross-encoder reranking as default. Retrieve top-50, rerank to top-10, pass to LLM.
431
+
432
+ ### Contextual Retrieval vs Naive RAG
433
+
434
+ Performance comparison from Anthropic's research:
435
+
436
+ | Approach | Retrieval Failure Rate | Improvement |
437
+ |---|---|---|
438
+ | Naive RAG (vector only) | 5.7% | Baseline |
439
+ | + BM25 Hybrid | 4.1% | -28% |
440
+ | + Contextual Embeddings | 3.7% | -35% |
441
+ | + Contextual Embeddings + BM25 | 2.9% | -49% |
442
+ | + Contextual + BM25 + Reranking | **1.9%** | **-67%** |
443
+
444
+ **memory-crystal should implement the full stack:** Contextual embeddings + BM25 hybrid search + reranking. This is the state of the art for retrieval quality.
445
+
446
+ ---
447
+
448
+ ## 5. Connectors / Ingestion Sources
449
+
450
+ ### Priority 1: macOS Native Sources
451
+
452
+ #### iMessage History
453
+
454
+ **Location:** `~/Library/Messages/chat.db`
455
+
456
+ **Access requirements:**
457
+ - Full Disk Access permission required (System Settings > Privacy & Security > Full Disk Access)
458
+ - SQLite database with WAL mode (three files: chat.db, chat.db-shm, chat.db-wal)
459
+
460
+ **Schema (key tables):**
461
+ - `message` -- message text, timestamps, is_from_me flag
462
+ - `handle` -- contacts (phone numbers, email addresses)
463
+ - `chat` -- conversation threads
464
+ - `chat_message_join` -- links messages to chats
465
+ - `chat_handle_join` -- links handles to chats
466
+ - `attachment` -- file attachments with local paths
467
+
468
+ **Important caveat (macOS Ventura+):** Messages are no longer stored as plain text. The `attributedBody` column contains a hex-encoded blob that must be decoded. The `text` column may be NULL for newer messages.
469
+
470
+ **Implementation approach:**
471
+ ```typescript
472
+ import Database from 'better-sqlite3';
473
+
474
+ const db = new Database(
475
+ path.join(os.homedir(), 'Library/Messages/chat.db'),
476
+ { readonly: true, fileMustExist: true }
477
+ );
478
+
479
+ // Query messages with contact info
480
+ const messages = db.prepare(`
481
+ SELECT
482
+ m.ROWID, m.text, m.attributedBody,
483
+ m.date/1000000000 + 978307200 as unix_timestamp,
484
+ m.is_from_me,
485
+ h.id as contact_id,
486
+ c.display_name as chat_name
487
+ FROM message m
488
+ JOIN chat_message_join cmj ON m.ROWID = cmj.message_id
489
+ JOIN chat c ON cmj.chat_id = c.ROWID
490
+ LEFT JOIN handle h ON m.handle_id = h.ROWID
491
+ ORDER BY m.date DESC
492
+ `).all();
493
+ ```
494
+
495
+ **Note on WAL:** The main database file may lag several seconds or minutes behind real-time. For near-real-time monitoring, also read from chat.db-wal.
496
+
497
+ #### Apple Notes
498
+
499
+ **Location:** `/Users/{username}/Library/Group Containers/group.com.apple.notes/NoteStore.sqlite`
500
+
501
+ **Challenges:**
502
+ - Note content stored in `ZICNOTEDATA.ZDATA` as a gzip-compressed blob
503
+ - Decompressed data is in Apple's proprietary protobuf-like binary format
504
+ - Requires reverse-engineering or using existing parsers
505
+
506
+ **Best approach:** Use [dogsheep/apple-notes-to-sqlite](https://github.com/dogsheep/apple-notes-to-sqlite) approach or call AppleScript/JXA to extract notes:
507
+ ```typescript
508
+ // Via osascript (JXA)
509
+ const script = `
510
+ const Notes = Application("Notes");
511
+ const notes = Notes.notes();
512
+ return notes.map(n => ({
513
+ name: n.name(),
514
+ body: n.plaintext(),
515
+ created: n.creationDate().toISOString(),
516
+ modified: n.modificationDate().toISOString(),
517
+ folder: n.container().name()
518
+ }));
519
+ `;
520
+ ```
521
+
522
+ #### Browser History / Bookmarks
523
+
524
+ **Chrome:**
525
+ - History: `~/Library/Application Support/Google/Chrome/Default/History` (SQLite)
526
+ - Bookmarks: `~/Library/Application Support/Google/Chrome/Default/Bookmarks` (JSON)
527
+ - Key table: `urls` (url, title, visit_count, last_visit_time)
528
+
529
+ **Firefox:**
530
+ - History + Bookmarks: `~/Library/Application Support/Firefox/Profiles/<profile>/places.sqlite`
531
+ - Key tables: `moz_places` (url, title, visit_count), `moz_bookmarks`
532
+
533
+ **Safari:**
534
+ - History: `~/Library/Safari/History.db` (SQLite, requires Full Disk Access)
535
+ - Bookmarks: `~/Library/Safari/Bookmarks.plist`
536
+
537
+ **Implementation note:** Chrome and Firefox lock their database files while running. Copy the file first, or open with `SQLITE_OPEN_READONLY` and handle busy errors.
538
+
539
+ #### Clipboard History
540
+
541
+ **macOS native (macOS 26+):** macOS 26 introduces native Clipboard History via Spotlight, but programmatic access is limited.
542
+
543
+ **Privacy changes (macOS 16+):** Apps now require explicit user permission to read the pasteboard. This affects real-time clipboard monitoring.
544
+
545
+ **Recommended approach:** Rather than trying to access system clipboard history, integrate with existing clipboard managers:
546
+ - [Maccy](https://maccy.app/) stores history in a SQLite database
547
+ - Or implement a background clipboard watcher that explicitly monitors NSPasteboard changes (requires user consent)
548
+
549
+ ### Priority 2: File System Sources
550
+
551
+ #### Local Files (Markdown, PDF, Code)
552
+
553
+ **Implementation:** Use a file watcher (chokidar or fs.watch) on configured directories:
554
+ ```typescript
555
+ const watchPaths = [
556
+ '~/Documents',
557
+ '~/Projects',
558
+ '~/Notes'
559
+ ];
560
+
561
+ // On file change: re-ingest
562
+ // On new file: ingest
563
+ // On delete: mark memories as inactive
564
+ ```
565
+
566
+ **PDF extraction:** Use `pdf-parse` or `pdfjs-dist` npm packages for text extraction.
567
+
568
+ **Code files:** Use the code-chunk library (see Section 3) for AST-aware chunking.
569
+
570
+ ### Priority 3: Network Sources
571
+
572
+ #### Email (IMAP / Gmail API)
573
+
574
+ **IMAP approach (self-hosted, no Google dependency):**
575
+ ```typescript
576
+ import Imap from 'imap';
577
+ // Connect to any IMAP server
578
+ // Fetch messages, extract body text
579
+ // Index by sender, subject, date
580
+ ```
581
+
582
+ **Gmail API approach:** Requires OAuth2, Google Cloud project setup. More reliable but adds cloud dependency.
583
+
584
+ **Recommendation:** Start with IMAP for maximum self-hosted compatibility. Add Gmail API as an optional connector.
585
+
586
+ ### Supermemory's Connectors
587
+
588
+ Supermemory offers: S3, Google Drive, Notion, OneDrive, web pages, custom connectors, browser extension (ChatGPT/Claude/Twitter integration), Raycast extension, and their [apple-mcp](https://github.com/supermemoryai/apple-mcp) which provides MCP tools for Messages, Notes, Contacts, Mail, Reminders, Calendar, and Maps.
589
+
590
+ ### Connector Priority for memory-crystal
591
+
592
+ | Priority | Connector | Complexity | Value |
593
+ |---|---|---|---|
594
+ | P0 | Manual add (text/URL) | Low | Foundation |
595
+ | P0 | Local files (md, txt, code) | Low | High |
596
+ | P0 | Conversation history (agent chats) | Low | Critical |
597
+ | P1 | iMessage | Medium | High (personal context) |
598
+ | P1 | Browser history/bookmarks | Medium | High |
599
+ | P1 | Apple Notes | Medium | High |
600
+ | P2 | PDF ingestion | Medium | Medium |
601
+ | P2 | URL/webpage scraping | Medium | Medium |
602
+ | P2 | Email (IMAP) | High | Medium |
603
+ | P3 | Clipboard history | High | Low |
604
+ | P3 | Calendar events | Medium | Low |
605
+
606
+ ---
607
+
608
+ ## 6. Memory Evolution
609
+
610
+ ### Handling Contradictory Memories
611
+
612
+ **Adopt mem0's approach** with temporal graph edges:
613
+
614
+ 1. When new information conflicts with an existing memory:
615
+ - Do NOT delete the old memory
616
+ - Mark the old memory's relation as `valid_until = now`
617
+ - Create a new memory/relation with `valid_from = now`
618
+ - Set `old_memory.superseded_by = new_memory.id`
619
+ - Keep old memory searchable for temporal queries ("What did I think about X last month?")
620
+
621
+ 2. **LLM-based conflict detection:**
622
+ ```typescript
623
+ async function resolveConflict(
624
+ newFact: string,
625
+ existingMemories: Memory[]
626
+ ): Promise<'ADD' | 'UPDATE' | 'DELETE' | 'NOOP'> {
627
+ const prompt = `Given this new information: "${newFact}"
628
+ And these existing memories:
629
+ ${existingMemories.map(m => `- ${m.content} (from ${m.created_at})`).join('\n')}
630
+
631
+ Determine the action:
632
+ - ADD: if this is genuinely new information
633
+ - UPDATE: if this supplements/refines an existing memory
634
+ - DELETE: if this contradicts and replaces an existing memory
635
+ - NOOP: if this is already known
636
+
637
+ Respond with the action and which existing memory (if any) is affected.`;
638
+
639
+ return llm.generate(prompt);
640
+ }
641
+ ```
642
+
643
+ ### Memory Decay / Relevance Scoring
644
+
645
+ **Implement a composite decay score:**
646
+
647
+ ```typescript
648
+ function calculateDecayScore(memory: Memory): number {
649
+ const now = Date.now();
650
+ const ageHours = (now - memory.created_at) / (1000 * 60 * 60);
651
+ const timeSinceAccess = (now - memory.last_accessed) / (1000 * 60 * 60);
652
+
653
+ // Exponential decay based on time since last access
654
+ const temporalDecay = Math.pow(0.995, timeSinceAccess);
655
+
656
+ // Access frequency boost (log scale)
657
+ const accessBoost = Math.log2(memory.access_count + 1) / 10;
658
+
659
+ // Importance weight (set by user or inferred)
660
+ const importanceWeight = memory.importance || 0.5;
661
+
662
+ // Composite score
663
+ return Math.min(1.0, (temporalDecay * 0.5) + (accessBoost * 0.3) + (importanceWeight * 0.2));
664
+ }
665
+ ```
666
+
667
+ **Decay update schedule:** Run decay recalculation:
668
+ - On every access (update `last_accessed`, increment `access_count`)
669
+ - Hourly background job for batch decay updates
670
+ - On retrieval, boost accessed memories
671
+
672
+ ### Deduplication
673
+
674
+ **Multi-stage deduplication:**
675
+
676
+ 1. **Exact match:** Hash-based dedup on raw content (SHA-256)
677
+ 2. **Near-duplicate:** Cosine similarity threshold on embeddings (>0.95 = duplicate)
678
+ 3. **Semantic duplicate:** LLM-based judgment for memories that express the same fact differently
679
+ - "I'm allergic to shellfish" and "can't eat shrimp" should be detected as related
680
+ - Merge into a consolidated memory with both observations attached
681
+
682
+ ```typescript
683
+ async function deduplicateMemory(newMemory: Memory): Promise<DedupeResult> {
684
+ // Stage 1: exact match
685
+ const exactMatch = await findByHash(sha256(newMemory.content));
686
+ if (exactMatch) return { action: 'skip', existing: exactMatch };
687
+
688
+ // Stage 2: near-duplicate (vector similarity)
689
+ const similar = await vectorSearch(newMemory.embedding, { threshold: 0.92 });
690
+ if (similar.length > 0 && similar[0].score > 0.95) {
691
+ return { action: 'skip', existing: similar[0] };
692
+ }
693
+
694
+ // Stage 3: semantic dedup (only for high-similarity matches)
695
+ if (similar.length > 0 && similar[0].score > 0.85) {
696
+ const isDuplicate = await llm.judge(
697
+ `Are these two memories expressing the same fact?\n1: "${newMemory.content}"\n2: "${similar[0].content}"`
698
+ );
699
+ if (isDuplicate) return { action: 'merge', existing: similar[0] };
700
+ }
701
+
702
+ return { action: 'add' };
703
+ }
704
+ ```
705
+
706
+ ### Memory Consolidation
707
+
708
+ Periodically consolidate related memories into higher-level summaries:
709
+
710
+ 1. **Cluster detection:** Find groups of memories with high mutual similarity
711
+ 2. **Summary generation:** Use LLM to create a consolidated summary
712
+ 3. **Hierarchy:** Keep both individual memories (for detail retrieval) and consolidated summaries (for overview retrieval)
713
+
714
+ ```
715
+ Individual memories:
716
+ - "Prefers TypeScript over JavaScript" (2025-06)
717
+ - "Uses Neovim as primary editor" (2025-07)
718
+ - "Interested in Rust for performance-critical code" (2025-08)
719
+ - "Values type safety highly" (2025-09)
720
+
721
+ Consolidated memory (auto-generated):
722
+ - "Developer who strongly values type safety, primarily uses TypeScript and Neovim,
723
+ with growing interest in Rust for performance-critical work."
724
+ ```
725
+
726
+ **Schedule:** Run consolidation weekly or when a cluster exceeds N related memories.
727
+
728
+ ---
729
+
730
+ ## 7. Existing Open Source Landscape
731
+
732
+ ### mem0
733
+
734
+ **Repository:** [github.com/mem0ai/mem0](https://github.com/mem0ai/mem0)
735
+ **License:** Apache 2.0
736
+ **Language:** Python (primary) + TypeScript SDK
737
+
738
+ **How it works:**
739
+ - Hybrid data store: vector DB (Qdrant, ChromaDB, pgvector, etc.) + graph DB (Neo4j) + key-value store
740
+ - LLM-based extraction: identifies facts, preferences, contextual info from conversations
741
+ - Four-operation update cycle: ADD/UPDATE/DELETE/NOOP
742
+ - Graph memory (Mem0g): entities + relationship triplets with conflict detection
743
+ - Factory pattern for pluggable backends (LlmFactory, EmbedderFactory, VectorStoreFactory, etc.)
744
+
745
+ **Performance:** 26% accuracy improvement over OpenAI's memory, 91% faster responses, 90% lower token usage vs full-context approaches.
746
+
747
+ **Relevance to memory-crystal:** Mem0's extraction/update pipeline is the gold standard. The ADD/UPDATE/DELETE/NOOP pattern should be adopted. However, mem0 requires Neo4j for graph memory (too heavy for local-first) and its TypeScript SDK is thinner than the Python implementation.
748
+
749
+ ### Khoj
750
+
751
+ **Repository:** [github.com/khoj-ai/khoj](https://github.com/khoj-ai/khoj)
752
+ **License:** AGPL-3.0
753
+ **Language:** Python 51%, TypeScript 36%
754
+
755
+ **How it works:**
756
+ - Full self-hosted AI assistant with memory, search, and chat
757
+ - Supports multiple document formats: PDFs, Markdown, Notion, Word docs, org-mode
758
+ - Semantic search + RAG
759
+ - Docker deployment (docker-compose)
760
+ - Supports multiple LLMs: llama3, qwen, gemma, mistral, GPT, Claude, Gemini, Deepseek
761
+
762
+ **Relevance to memory-crystal:** Khoj is a full application, not a composable library/plugin. Its document ingestion patterns are useful reference, but it is too monolithic to integrate directly. AGPL license is also restrictive.
763
+
764
+ ### Supermemory
765
+
766
+ **Repository:** [github.com/supermemoryai/supermemory](https://github.com/supermemoryai/supermemory)
767
+ **License:** Mixed (some components open-source)
768
+
769
+ **Key open-source components:**
770
+ - [supermemory-mcp](https://github.com/supermemoryai/supermemory-mcp) -- Universal Memory MCP server
771
+ - [apple-mcp](https://github.com/supermemoryai/apple-mcp) -- Apple-native MCP tools (Messages, Notes, Contacts, Mail, Reminders, Calendar, Maps)
772
+ - [code-chunk](https://github.com/supermemoryai/code-chunk) -- AST-aware code chunking (TypeScript, tree-sitter)
773
+
774
+ **Relevance to memory-crystal:** The code-chunk library should be used directly as a dependency. The apple-mcp patterns are useful reference for macOS connector implementation. The core memory engine is not open-source.
775
+
776
+ ### Graphiti (Zep)
777
+
778
+ **Repository:** [github.com/getzep/graphiti](https://github.com/getzep/graphiti)
779
+ **License:** Apache 2.0
780
+ **Language:** Python
781
+
782
+ **How it works:**
783
+ - Temporal knowledge graph framework for AI agent memory
784
+ - Bi-temporal data model (event time + ingestion time)
785
+ - Real-time incremental updates (no batch recomputation)
786
+ - Supports Neo4j, FalkorDB, Kuzu
787
+ - MCP server with add_episode, search_nodes, search_facts tools
788
+
789
+ **Relevance to memory-crystal:** Best existing implementation of temporal memory graphs. The bi-temporal model should be adopted. However, Python-only and requires a graph database server.
790
+
791
+ ### OpenMemory (CaviraOSS)
792
+
793
+ **Repository:** [github.com/CaviraOSS/OpenMemory](https://github.com/CaviraOSS/OpenMemory)
794
+
795
+ **How it works:**
796
+ - Dockerized: FastAPI + Postgres + Qdrant
797
+ - Uses Mem0 under the hood
798
+ - MCP server via SSE
799
+ - Tools: add_memories, search_memory, list_memories, delete_all_memories
800
+ - Five cognitive sectors for automatic content classification
801
+ - Time as a first-class dimension
802
+ - Fine-grained access control between apps and memories
803
+
804
+ **Relevance to memory-crystal:** Good reference for MCP tool design and access control patterns. Too heavy (Docker, Postgres, Qdrant) for embedded local-first use.
805
+
806
+ ### MCP Memory Service (doobidoo)
807
+
808
+ **Repository:** [github.com/doobidoo/mcp-memory-service](https://github.com/doobidoo/mcp-memory-service)
809
+
810
+ **How it works:**
811
+ - ChromaDB + sentence transformers for semantic memory
812
+ - Designed for Claude Desktop, VS Code, Cursor, and 13+ AI tools
813
+ - Automatic 24-hour backup cycle
814
+ - Content + tag-based retrieval
815
+
816
+ **Relevance to memory-crystal:** Simplest existing MCP memory implementation. Good starting point for tool API design, but lacks graph memory, hybrid search, and memory evolution.
817
+
818
+ ### Official MCP Knowledge Graph Memory Server
819
+
820
+ **Repository:** [github.com/modelcontextprotocol/servers/tree/main/src/memory](https://github.com/modelcontextprotocol/servers/tree/main/src/memory)
821
+
822
+ **How it works:**
823
+ - Entities + Relations + Observations data model
824
+ - JSONL file storage
825
+ - Nine tools: create_entities, create_relations, add_observations, delete_entities, delete_observations, delete_relations, read_graph, search_nodes, open_nodes
826
+ - Simple string matching for search (no vector/semantic search)
827
+
828
+ **Relevance to memory-crystal:** The entity/relation/observation data model is well-designed and should be adopted. The tool API naming conventions should be followed. But it lacks vector search, embeddings, and any form of intelligence.
829
+
830
+ ---
831
+
832
+ ## 8. MCP Integration
833
+
834
+ ### MCP Tool Design for memory-crystal
835
+
836
+ Based on analysis of existing MCP memory servers and best practices, memory-crystal should expose these tools:
837
+
838
+ #### Core Memory Tools
839
+
840
+ ```typescript
841
+ // 1. Store a new memory
842
+ tool("memory_store", {
843
+ content: string, // the text content to remember
844
+ source?: string, // where it came from (file path, URL, "conversation", etc.)
845
+ tags?: string[], // user-defined tags
846
+ importance?: number, // 0.0-1.0, how important is this
847
+ }) -> { memory_id: string, entities_extracted: Entity[] }
848
+
849
+ // 2. Search memories (primary retrieval)
850
+ tool("memory_search", {
851
+ query: string, // natural language query
852
+ limit?: number, // max results (default: 10)
853
+ source_filter?: string, // filter by source type
854
+ time_range?: { after?: string, before?: string },
855
+ include_graph?: boolean, // also return related entities/facts
856
+ }) -> { memories: Memory[], entities?: Entity[], relations?: Relation[] }
857
+
858
+ // 3. Recall everything about an entity
859
+ tool("memory_recall", {
860
+ entity: string, // entity name (person, project, concept)
861
+ }) -> { entity: Entity, observations: Observation[], relations: Relation[], related_memories: Memory[] }
862
+
863
+ // 4. Store a fact (structured)
864
+ tool("memory_fact", {
865
+ subject: string, // entity name
866
+ relation: string, // relationship type
867
+ object: string, // target entity or value
868
+ source?: string,
869
+ }) -> { fact_id: string, action: 'added' | 'updated' | 'duplicate' }
870
+ ```
871
+
872
+ #### Memory Management Tools
873
+
874
+ ```typescript
875
+ // 5. List recent memories
876
+ tool("memory_list", {
877
+ limit?: number,
878
+ offset?: number,
879
+ source_filter?: string,
880
+ }) -> { memories: Memory[], total: number }
881
+
882
+ // 6. Delete a memory
883
+ tool("memory_delete", {
884
+ memory_id: string,
885
+ }) -> { success: boolean }
886
+
887
+ // 7. Ingest a file or URL
888
+ tool("memory_ingest", {
889
+ path: string, // file path or URL
890
+ recursive?: boolean, // for directories
891
+ }) -> { chunks_created: number, entities_extracted: number }
892
+
893
+ // 8. Get memory stats
894
+ tool("memory_stats") -> {
895
+ total_memories: number,
896
+ total_entities: number,
897
+ total_relations: number,
898
+ by_source: Record<string, number>,
899
+ storage_size_mb: number,
900
+ }
901
+ ```
902
+
903
+ #### Advanced Tools (Phase 2)
904
+
905
+ ```typescript
906
+ // 9. Consolidate related memories
907
+ tool("memory_consolidate", {
908
+ entity?: string, // consolidate around an entity
909
+ auto?: boolean, // auto-detect clusters to consolidate
910
+ }) -> { consolidated: number, summaries_created: number }
911
+
912
+ // 10. Memory timeline
913
+ tool("memory_timeline", {
914
+ entity?: string,
915
+ time_range?: { after?: string, before?: string },
916
+ }) -> { events: TimelineEvent[] }
917
+ ```
918
+
919
+ ### MCP Resource Exposure
920
+
921
+ In addition to tools, expose memories as MCP resources:
922
+
923
+ ```typescript
924
+ // Expose the full knowledge graph as a resource
925
+ resource("memory://graph", "The complete knowledge graph of entities and relations");
926
+
927
+ // Expose recent memories as a resource for context injection
928
+ resource("memory://recent", "Recent memories from the last 24 hours");
929
+
930
+ // Expose entity summaries as resources
931
+ resource("memory://entity/{name}", "Everything known about {name}");
932
+ ```
933
+
934
+ ### Best Practices for MCP Memory Servers
935
+
936
+ 1. **Automatic context injection:** At the start of each conversation, automatically provide relevant recent memories and entity summaries as context. This is how the official MCP Knowledge Graph Memory server works -- "retrieval of all relevant information from a knowledge graph at the start of chat."
937
+
938
+ 2. **Implicit memory capture:** After each agent conversation, automatically extract and store new facts/memories from the conversation. Do not require explicit "remember this" commands for basic facts.
939
+
940
+ 3. **Transparent operation:** Always tell the user when memories are being stored or retrieved. Include source attribution in retrieved memories.
941
+
942
+ 4. **Graceful degradation:** If the memory system is slow or unavailable, the agent should still function (just without memory context). Memory operations should not block the main conversation flow.
943
+
944
+ 5. **Privacy controls:** Users must be able to:
945
+ - See all stored memories
946
+ - Delete any memory
947
+ - Pause memory collection from specific sources
948
+ - Export all memory data (data sovereignty)
949
+
950
+ ---
951
+
952
+ ## 9. Recommended Architecture
953
+
954
+ ### High-Level Architecture
955
+
956
+ ```
957
+ +------------------------------------------------------------------+
958
+ | memory-crystal OpenClaw Plugin |
959
+ +------------------------------------------------------------------+
960
+ | |
961
+ | +------------------+ +------------------+ +---------------+ |
962
+ | | MCP Server | | Ingestion | | Background | |
963
+ | | (Tools + | | Pipeline | | Workers | |
964
+ | | Resources) | | | | | |
965
+ | | | | - Chunker | | - Decay calc | |
966
+ | | memory_store | | - Contextualizer| | - Consolidate | |
967
+ | | memory_search | | - Embedder | | - Dedup | |
968
+ | | memory_recall | | - Entity Extract | | - Re-embed | |
969
+ | | memory_fact | | - Graph Builder | | | |
970
+ | | memory_ingest | | | | | |
971
+ | | memory_list | | | | | |
972
+ | | memory_delete | | | | | |
973
+ | | memory_stats | | | | | |
974
+ | +--------+---------+ +--------+---------+ +-------+-------+ |
975
+ | | | | |
976
+ | +--------v-----------------------v-----------------------v------+ |
977
+ | | Memory Core | |
978
+ | | | |
979
+ | | +-------------------+ +-------------------+ | |
980
+ | | | LanceDB | | SQLite | | |
981
+ | | | (Vector Store) | | (Graph + Meta) | | |
982
+ | | | | | | | |
983
+ | | | - Embeddings | | - Entities | | |
984
+ | | | - BM25 Index | | - Relations | | |
985
+ | | | - Hybrid Search | | - Observations | | |
986
+ | | | | | - Memory metadata | | |
987
+ | | +-------------------+ +-------------------+ | |
988
+ | +---------------------------------------------------------------+ |
989
+ | |
990
+ | +---------------------------------------------------------------+ |
991
+ | | Connectors | |
992
+ | | [iMessage] [Notes] [Browser] [Files] [Email] [Clipboard] | |
993
+ | +---------------------------------------------------------------+ |
994
+ | |
995
+ | +---------------------------------------------------------------+ |
996
+ | | Embedding Provider | |
997
+ | | [Ollama/nomic-embed] [OpenAI API] [ONNX Local] | |
998
+ | +---------------------------------------------------------------+ |
999
+ +------------------------------------------------------------------+
1000
+ ```
1001
+
1002
+ ### Technology Stack
1003
+
1004
+ | Component | Technology | Rationale |
1005
+ |---|---|---|
1006
+ | **Runtime** | Node.js / TypeScript | OpenClaw plugin ecosystem |
1007
+ | **Build** | tsup | Matches existing OpenClaw plugin pattern |
1008
+ | **Vector Store** | LanceDB (`@lancedb/lancedb`) | Embedded, TS-native, hybrid search, disk-efficient |
1009
+ | **Graph/Metadata** | SQLite (`better-sqlite3`) | Embedded, zero-config, performant for graph operations |
1010
+ | **Embeddings (default)** | nomic-embed-text-v1.5 via Ollama | Local, free, SOTA quality, Matryoshka support |
1011
+ | **Embeddings (fallback)** | text-embedding-3-small via OpenAI API | For users without GPU/Ollama |
1012
+ | **Code Chunking** | `@supermemory/code-chunk` | AST-aware, tree-sitter, TypeScript native |
1013
+ | **Text Chunking** | Custom recursive splitter | 400-512 tokens, heading-aware for markdown |
1014
+ | **Reranking** | cross-encoder/ms-marco-MiniLM-L-6-v2 (ONNX) | Local, fast, significant quality improvement |
1015
+ | **BM25** | LanceDB FTS (built-in) | No extra dependency |
1016
+ | **MCP** | `@modelcontextprotocol/sdk` | Standard MCP server implementation |
1017
+ | **File watching** | chokidar | Mature, cross-platform file watcher |
1018
+
1019
+ ### Data Storage Layout
1020
+
1021
+ ```
1022
+ ~/.openclaw/memory-crystal/
1023
+ config.json # User configuration
1024
+ lance/ # LanceDB data directory
1025
+ memories.lance/ # Vector index + BM25 index
1026
+ crystal.db # SQLite: graph, metadata, memory records
1027
+ backups/ # Automatic daily backups
1028
+ crystal-2026-02-07.db
1029
+ lance-2026-02-07/
1030
+ ```
1031
+
1032
+ ### Configuration Schema (openclaw.plugin.json)
1033
+
1034
+ ```json
1035
+ {
1036
+ "id": "memory-crystal",
1037
+ "name": "Memory Crystal",
1038
+ "description": "Sovereign, self-hosted memory system for AI agents",
1039
+ "skills": ["./skills"],
1040
+ "configSchema": {
1041
+ "type": "object",
1042
+ "properties": {
1043
+ "dataDir": {
1044
+ "type": "string",
1045
+ "description": "Where to store memory data"
1046
+ },
1047
+ "embeddingProvider": {
1048
+ "type": "string",
1049
+ "enum": ["ollama", "openai", "onnx"],
1050
+ "default": "ollama"
1051
+ },
1052
+ "embeddingModel": {
1053
+ "type": "string",
1054
+ "default": "nomic-embed-text"
1055
+ },
1056
+ "ollamaBaseUrl": {
1057
+ "type": "string",
1058
+ "default": "http://localhost:11434"
1059
+ },
1060
+ "openaiApiKey": {
1061
+ "type": "string"
1062
+ },
1063
+ "autoIngestConversations": {
1064
+ "type": "boolean",
1065
+ "default": true
1066
+ },
1067
+ "connectors": {
1068
+ "type": "object",
1069
+ "properties": {
1070
+ "imessage": { "type": "boolean", "default": false },
1071
+ "appleNotes": { "type": "boolean", "default": false },
1072
+ "browserHistory": { "type": "boolean", "default": false },
1073
+ "localFiles": {
1074
+ "type": "object",
1075
+ "properties": {
1076
+ "enabled": { "type": "boolean", "default": false },
1077
+ "watchPaths": { "type": "array", "items": { "type": "string" } }
1078
+ }
1079
+ }
1080
+ }
1081
+ },
1082
+ "decay": {
1083
+ "type": "object",
1084
+ "properties": {
1085
+ "hourlyFactor": { "type": "number", "default": 0.995 },
1086
+ "consolidationThreshold": { "type": "number", "default": 10 }
1087
+ }
1088
+ }
1089
+ }
1090
+ }
1091
+ }
1092
+ ```
1093
+
1094
+ ### Implementation Phases
1095
+
1096
+ **Phase 1 (MVP):**
1097
+ - LanceDB vector store with hybrid search
1098
+ - SQLite graph (entities, relations, observations)
1099
+ - Basic chunking (recursive character splitting for text, code-chunk for code)
1100
+ - Memory CRUD via MCP tools (store, search, recall, delete, list)
1101
+ - Manual ingestion (text, files, URLs)
1102
+ - Ollama embedding integration
1103
+ - Simple decay scoring
1104
+
1105
+ **Phase 2 (Intelligence):**
1106
+ - Contextual retrieval (Anthropic's context-prepending approach)
1107
+ - Local cross-encoder reranking
1108
+ - LLM-based entity extraction and conflict resolution (ADD/UPDATE/DELETE/NOOP)
1109
+ - Memory consolidation
1110
+ - Semantic deduplication
1111
+ - HyDE query expansion for complex queries
1112
+
1113
+ **Phase 3 (Connectors):**
1114
+ - iMessage connector
1115
+ - Apple Notes connector
1116
+ - Browser history/bookmarks connector
1117
+ - File watcher for local directories
1118
+ - Automatic conversation ingestion
1119
+
1120
+ **Phase 4 (Advanced):**
1121
+ - Email (IMAP) connector
1122
+ - Clipboard history integration
1123
+ - Memory timeline visualization
1124
+ - Export/import (data sovereignty)
1125
+ - Multi-agent memory sharing (with access controls)
1126
+
1127
+ ---
1128
+
1129
+ ## Sources
1130
+
1131
+ ### Vector Databases
1132
+ - [sqlite-vec GitHub](https://github.com/asg017/sqlite-vec)
1133
+ - [LanceDB Documentation](https://docs.lancedb.com/)
1134
+ - [LanceDB Full-Text Search](https://docs.lancedb.com/search/full-text-search)
1135
+ - [Continue IDE + LanceDB Case Study](https://lancedb.com/blog/the-future-of-ai-native-development-is-local-inside-continues-lancedb-powered-evolution/)
1136
+ - [ChromaDB vs Qdrant Comparison](https://zenvanriel.nl/ai-engineer-blog/chroma-vs-qdrant-local-development/)
1137
+ - [SQLite vs Chroma Analysis](https://dev.to/stephenc222/sqlite-vs-chroma-a-comparative-analysis-for-managing-vector-embeddings-4i76)
1138
+
1139
+ ### Knowledge Graphs and Memory
1140
+ - [Supermemory Memory Engine Blog](https://supermemory.ai/blog/memory-engine/)
1141
+ - [Mem0 Graph Memory Documentation](https://docs.mem0.ai/open-source/features/graph-memory)
1142
+ - [Mem0 Architecture Paper (arXiv)](https://arxiv.org/html/2504.19413v1)
1143
+ - [Microsoft GraphRAG](https://microsoft.github.io/graphrag/)
1144
+ - [Zep/Graphiti Temporal Knowledge Graph Paper (arXiv)](https://arxiv.org/abs/2501.13956)
1145
+ - [Graphiti GitHub](https://github.com/getzep/graphiti)
1146
+ - [Graphiti + FalkorDB MCP](https://www.falkordb.com/blog/mcp-knowledge-graph-graphiti-falkordb/)
1147
+ - [Memory in the Age of AI Agents Survey (arXiv)](https://arxiv.org/abs/2512.13564)
1148
+
1149
+ ### Retrieval and RAG
1150
+ - [Anthropic Contextual Retrieval](https://www.anthropic.com/news/contextual-retrieval)
1151
+ - [HyDE Paper (arXiv)](https://arxiv.org/abs/2212.10496)
1152
+ - [Chunking Strategies for RAG 2025](https://www.firecrawl.dev/blog/best-chunking-strategies-rag-2025)
1153
+ - [Reranking Guide 2025](https://www.zeroentropy.dev/articles/ultimate-guide-to-choosing-the-best-reranking-model-in-2025)
1154
+ - [Hybrid Search for AI Memory (Node.js + pgvector)](https://dev.to/the_nortern_dev/under-the-hood-building-a-hybrid-search-engine-for-ai-memory-nodejs-pgvector-3c5k)
1155
+ - [BM25 Hybrid Search for AI Memory Server](https://dev.to/jakob_sandstrm_a11b3056c/vector-search-is-not-enough-why-i-added-bm25-hybrid-search-to-my-ai-memory-server-3h3l)
1156
+
1157
+ ### Embedding Models
1158
+ - [Supermemory: Best Open-Source Embedding Models Benchmarked](https://supermemory.ai/blog/best-open-source-embedding-models-benchmarked-and-ranked/)
1159
+ - [Comparing Local Embedding Models for RAG](https://medium.com/@jinmochong/comparing-local-embedding-models-for-rag-systems-all-minilm-nomic-and-openai-ee425b507263)
1160
+ - [Nomic Embed Matryoshka](https://www.nomic.ai/blog/posts/nomic-embed-matryoshka)
1161
+
1162
+ ### Ingestion and Chunking
1163
+ - [Supermemory code-chunk](https://github.com/supermemoryai/code-chunk)
1164
+ - [Weaviate Chunking Strategies](https://weaviate.io/blog/chunking-strategies-for-rag)
1165
+ - [Document Chunking: 70% Accuracy Boost](https://langcopilot.com/posts/2025-10-11-document-chunking-for-rag-practical-guide)
1166
+
1167
+ ### macOS Data Sources
1168
+ - [iMessage SQL Database Access](https://davidbieber.com/snippets/2020-05-20-imessage-sql-db/)
1169
+ - [Deep Dive into iMessage](https://fatbobman.com/en/posts/deep-dive-into-imessage)
1170
+ - [Apple Notes to SQLite](https://github.com/dogsheep/apple-notes-to-sqlite)
1171
+ - [Browser History with SQLite](https://ellen.dev/exploring-browser-history.html)
1172
+
1173
+ ### MCP and Memory Servers
1174
+ - [Official MCP Knowledge Graph Memory Server](https://github.com/modelcontextprotocol/servers/tree/main/src/memory)
1175
+ - [MCP Memory Service (doobidoo)](https://github.com/doobidoo/mcp-memory-service)
1176
+ - [OpenMemory (CaviraOSS)](https://github.com/CaviraOSS/OpenMemory)
1177
+ - [MCP Specification 2025-11-25](https://modelcontextprotocol.io/specification/2025-11-25)
1178
+ - [Mem0 OpenMemory MCP](https://mem0.ai/openmemory)
1179
+
1180
+ ### Open Source Projects
1181
+ - [mem0 GitHub](https://github.com/mem0ai/mem0)
1182
+ - [Khoj GitHub](https://github.com/khoj-ai/khoj)
1183
+ - [Supermemory GitHub](https://github.com/supermemoryai/supermemory)
1184
+ - [Supermemory Apple MCP](https://github.com/supermemoryai/apple-mcp)
1185
+ - [OkapiBM25 npm (Node.js BM25)](https://github.com/FurkanToprak/OkapiBM25)