cozo-memory 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +374 -40
- package/dist/download-model.js +3 -1
- package/dist/embedding-service.js +81 -12
- package/dist/export-import-service.js +472 -0
- package/dist/index.js +290 -15
- package/dist/inference-engine.js +9 -2
- package/dist/memory-service.js +88 -5
- package/dist/test-bugfixes.js +374 -0
- package/dist/test-delete-comprehensive.js +174 -0
- package/dist/test-export-import.js +152 -0
- package/dist/test-fixes-simple.js +50 -0
- package/dist/test-pdf-ingest.js +2 -0
- package/dist/test-qwen3-bilingual.js +2 -0
- package/package.json +5 -1
- package/dist/verify_transaction_tool.js +0 -46
package/README.md
CHANGED
|
@@ -1,8 +1,36 @@
|
|
|
1
1
|
# CozoDB Memory MCP Server
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/cozo-memory)
|
|
4
|
+
[](https://nodejs.org)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
**Local-first memory for Claude & AI agents with hybrid search, Graph-RAG, and time-travel – all in a single binary, no cloud, no Docker.**
|
|
8
|
+
|
|
9
|
+
## Table of Contents
|
|
10
|
+
|
|
11
|
+
- [Quick Start](#quick-start)
|
|
12
|
+
- [Key Features](#key-features)
|
|
13
|
+
- [Positioning & Comparison](#positioning--comparison)
|
|
14
|
+
- [Performance & Benchmarks](#performance--benchmarks)
|
|
15
|
+
- [Architecture](#architecture)
|
|
16
|
+
- [Installation](#installation)
|
|
17
|
+
- [Start / Integration](#start--integration)
|
|
18
|
+
- [Configuration & Backends](#configuration--backends)
|
|
19
|
+
- [Data Model](#data-model)
|
|
20
|
+
- [MCP Tools](#mcp-tools)
|
|
21
|
+
- [mutate_memory (Write)](#mutate_memory-write)
|
|
22
|
+
- [query_memory (Read)](#query_memory-read)
|
|
23
|
+
- [analyze_graph (Analysis)](#analyze_graph-analysis)
|
|
24
|
+
- [manage_system (Maintenance)](#manage_system-maintenance)
|
|
25
|
+
- [Production Monitoring](#production-monitoring)
|
|
26
|
+
- [Technical Highlights](#technical-highlights)
|
|
27
|
+
- [Optional: HTTP API Bridge](#optional-http-api-bridge)
|
|
28
|
+
- [Development](#development)
|
|
29
|
+
- [User Preference Profiling](#user-preference-profiling-mem0-style)
|
|
30
|
+
- [Troubleshooting](#troubleshooting)
|
|
31
|
+
- [Roadmap](#roadmap)
|
|
32
|
+
- [Contributing](#contributing)
|
|
33
|
+
- [License](#license)
|
|
6
34
|
|
|
7
35
|
## Quick Start
|
|
8
36
|
|
|
@@ -27,13 +55,35 @@ npm run start
|
|
|
27
55
|
|
|
28
56
|
Now you can add the server to your MCP client (e.g. Claude Desktop).
|
|
29
57
|
|
|
30
|
-
##
|
|
58
|
+
## Key Features
|
|
59
|
+
|
|
60
|
+
🔍 **Hybrid Search (since v0.7)** - Combines semantic search (HNSW), full-text search (FTS), and graph signals via Reciprocal Rank Fusion (RRF)
|
|
61
|
+
|
|
62
|
+
🕸️ **Graph-RAG & Graph-Walking (since v1.7)** - Advanced retrieval combining vector seeds with recursive graph traversals using optimized Datalog algorithms
|
|
63
|
+
|
|
64
|
+
🎯 **Multi-Vector Support (since v1.7)** - Dual embeddings per entity: content-embedding for context, name-embedding for identification
|
|
65
|
+
|
|
66
|
+
⚡ **Semantic Caching (since v0.8.5)** - Two-level cache (L1 memory + L2 persistent) with semantic query matching
|
|
67
|
+
|
|
68
|
+
⏱️ **Time-Travel Queries** - Version all changes via CozoDB Validity; query any point in history
|
|
69
|
+
|
|
70
|
+
🔗 **Atomic Transactions (since v1.2)** - Multi-statement transactions ensuring data consistency
|
|
31
71
|
|
|
32
|
-
|
|
33
|
-
- An MCP server (stdio) for Claude/other MCP clients.
|
|
34
|
-
- An optional HTTP API bridge server for UI/tools.
|
|
72
|
+
📊 **Graph Algorithms (since v1.3/v1.6)** - PageRank, Betweenness Centrality, HITS, Community Detection, Shortest Path
|
|
35
73
|
|
|
36
|
-
|
|
74
|
+
🧹 **Janitor Service** - LLM-backed automatic cleanup with hierarchical summarization
|
|
75
|
+
|
|
76
|
+
👤 **User Preference Profiling** - Persistent user preferences with automatic 50% search boost
|
|
77
|
+
|
|
78
|
+
🔍 **Near-Duplicate Detection** - Automatic LSH-based deduplication to avoid redundancy
|
|
79
|
+
|
|
80
|
+
🧠 **Inference Engine** - Implicit knowledge discovery with multiple strategies
|
|
81
|
+
|
|
82
|
+
🏠 **100% Local** - Embeddings via ONNX/Transformers; no external services required
|
|
83
|
+
|
|
84
|
+
📦 **Export/Import (since v1.8)** - Export to JSON, Markdown, or Obsidian-ready ZIP; import from Mem0, MemGPT, Markdown, or native format
|
|
85
|
+
|
|
86
|
+
### Detailed Features
|
|
37
87
|
- **Hybrid Search (v0.7 Optimized)**: Combination of semantic search (HNSW), **Full-Text Search (FTS)**, and graph signals, merged via Reciprocal Rank Fusion (RRF).
|
|
38
88
|
- **Full-Text Search (FTS)**: Native CozoDB v0.7 FTS indices with stemming, stopword filtering, and robust query sanitizing (cleaning of `+ - * / \ ( ) ? .`) for maximum stability.
|
|
39
89
|
- **Near-Duplicate Detection (LSH)**: Automatically detects very similar observations via MinHash-LSH (CozoDB v0.7) to avoid redundancy.
|
|
@@ -119,39 +169,68 @@ This tool (`src/benchmark.ts`) performs the following tests:
|
|
|
119
169
|
3. **Search Performance**: Latency measurement for Hybrid Search vs. Raw Vector Search.
|
|
120
170
|
4. **RRF Overhead**: Determination of additional computation time for fusion logic.
|
|
121
171
|
|
|
122
|
-
## Architecture
|
|
123
|
-
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
172
|
+
## Architecture
|
|
173
|
+
|
|
174
|
+
```mermaid
|
|
175
|
+
graph TB
|
|
176
|
+
Client[MCP Client<br/>Claude Desktop, etc.]
|
|
177
|
+
Server[MCP Server<br/>FastMCP + Zod Schemas]
|
|
178
|
+
Services[Memory Services]
|
|
179
|
+
Embeddings[Embeddings<br/>ONNX Runtime]
|
|
180
|
+
Search[Hybrid Search<br/>RRF Fusion]
|
|
181
|
+
Cache[Semantic Cache<br/>L1 + L2]
|
|
182
|
+
Inference[Inference Engine<br/>Multi-Strategy]
|
|
183
|
+
DB[(CozoDB SQLite<br/>Relations + Validity<br/>HNSW Indices<br/>Datalog/Graph)]
|
|
184
|
+
|
|
185
|
+
Client -->|stdio| Server
|
|
186
|
+
Server --> Services
|
|
187
|
+
Services --> Embeddings
|
|
188
|
+
Services --> Search
|
|
189
|
+
Services --> Cache
|
|
190
|
+
Services --> Inference
|
|
191
|
+
Services --> DB
|
|
192
|
+
|
|
193
|
+
style Client fill:#e1f5ff
|
|
194
|
+
style Server fill:#fff4e1
|
|
195
|
+
style Services fill:#f0e1ff
|
|
196
|
+
style DB fill:#e1ffe1
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Graph-Walking Visualization
|
|
200
|
+
|
|
201
|
+
```mermaid
|
|
202
|
+
graph LR
|
|
203
|
+
Start([Query: What is Alice working on?])
|
|
204
|
+
V1[Vector Search<br/>Find: Alice]
|
|
205
|
+
E1[Alice<br/>Person]
|
|
206
|
+
E2[Project X<br/>Project]
|
|
207
|
+
E3[Feature Flags<br/>Technology]
|
|
208
|
+
E4[Bob<br/>Person]
|
|
209
|
+
|
|
210
|
+
Start --> V1
|
|
211
|
+
V1 -.semantic similarity.-> E1
|
|
212
|
+
E1 -->|works_on| E2
|
|
213
|
+
E2 -->|uses_tech| E3
|
|
214
|
+
E1 -->|colleague_of| E4
|
|
215
|
+
E4 -.semantic: also relevant.-> E2
|
|
216
|
+
|
|
217
|
+
style Start fill:#e1f5ff
|
|
218
|
+
style V1 fill:#fff4e1
|
|
219
|
+
style E1 fill:#ffe1e1
|
|
220
|
+
style E2 fill:#e1ffe1
|
|
221
|
+
style E3 fill:#f0e1ff
|
|
222
|
+
style E4 fill:#ffe1e1
|
|
148
223
|
```
|
|
149
224
|
|
|
150
225
|
## Installation
|
|
151
226
|
|
|
152
227
|
### Prerequisites
|
|
153
228
|
- Node.js 20+ (recommended)
|
|
154
|
-
-
|
|
229
|
+
- **RAM: 1.7 GB minimum** (for default bge-m3 model)
|
|
230
|
+
- Model download: ~600 MB
|
|
231
|
+
- Runtime memory: ~1.1 GB
|
|
232
|
+
- For lower-spec machines, see [Embedding Model Options](#embedding-model-options) below
|
|
233
|
+
- CozoDB native dependency is installed via `cozo-node`
|
|
155
234
|
|
|
156
235
|
### Via npm (Easiest)
|
|
157
236
|
|
|
@@ -184,6 +263,62 @@ Notes:
|
|
|
184
263
|
- On first start, `@xenova/transformers` downloads the embedding model (may take time).
|
|
185
264
|
- Embeddings are processed on the CPU.
|
|
186
265
|
|
|
266
|
+
### Embedding Model Options
|
|
267
|
+
|
|
268
|
+
CozoDB Memory supports multiple embedding models via the `EMBEDDING_MODEL` environment variable:
|
|
269
|
+
|
|
270
|
+
| Model | Size | RAM | Dimensions | Best For |
|
|
271
|
+
|-------|------|-----|------------|----------|
|
|
272
|
+
| `Xenova/bge-m3` (default) | ~600 MB | ~1.7 GB | 1024 | High accuracy, production use |
|
|
273
|
+
| `Xenova/all-MiniLM-L6-v2` | ~80 MB | ~400 MB | 384 | Low-spec machines, development |
|
|
274
|
+
| `Xenova/bge-small-en-v1.5` | ~130 MB | ~600 MB | 384 | Balanced performance |
|
|
275
|
+
|
|
276
|
+
**Configuration Options:**
|
|
277
|
+
|
|
278
|
+
**Option 1: Using `.env` file (Easiest for beginners)**
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
# Copy the example file
|
|
282
|
+
cp .env.example .env
|
|
283
|
+
|
|
284
|
+
# Edit .env and set your preferred model
|
|
285
|
+
EMBEDDING_MODEL=Xenova/all-MiniLM-L6-v2
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
**Option 2: MCP Server Config (For Claude Desktop / Kiro)**
|
|
289
|
+
|
|
290
|
+
```json
|
|
291
|
+
{
|
|
292
|
+
"mcpServers": {
|
|
293
|
+
"cozo-memory": {
|
|
294
|
+
"command": "npx",
|
|
295
|
+
"args": ["cozo-memory"],
|
|
296
|
+
"env": {
|
|
297
|
+
"EMBEDDING_MODEL": "Xenova/all-MiniLM-L6-v2"
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
**Option 3: Command Line**
|
|
305
|
+
|
|
306
|
+
```bash
|
|
307
|
+
# Use lightweight model for development
|
|
308
|
+
EMBEDDING_MODEL=Xenova/all-MiniLM-L6-v2 npm run start
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
**Download Model First (Recommended):**
|
|
312
|
+
|
|
313
|
+
```bash
|
|
314
|
+
# Set model in .env or via command line, then:
|
|
315
|
+
EMBEDDING_MODEL=Xenova/all-MiniLM-L6-v2 npm run download-model
|
|
316
|
+
```
|
|
317
|
+
}
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
**Note:** Changing models requires re-embedding existing data. The model is downloaded once on first use.
|
|
321
|
+
|
|
187
322
|
## Start / Integration
|
|
188
323
|
|
|
189
324
|
### MCP Server (stdio)
|
|
@@ -262,6 +397,14 @@ DB_ENGINE=rocksdb npm run dev
|
|
|
262
397
|
| **RocksDB** | Prepared & Tested | For high-performance or very large datasets. |
|
|
263
398
|
| **MDBX** | Not supported | Requires manual build of `cozo-node` from source. |
|
|
264
399
|
|
|
400
|
+
### Environment Variables
|
|
401
|
+
|
|
402
|
+
| Variable | Default | Description |
|
|
403
|
+
|----------|---------|-------------|
|
|
404
|
+
| `DB_ENGINE` | `sqlite` | Database backend: `sqlite` or `rocksdb` |
|
|
405
|
+
| `EMBEDDING_MODEL` | `Xenova/bge-m3` | Embedding model (see [Embedding Model Options](#embedding-model-options)) |
|
|
406
|
+
| `PORT` | `3001` | HTTP API bridge port (if using `npm run bridge`) |
|
|
407
|
+
|
|
265
408
|
---
|
|
266
409
|
|
|
267
410
|
## Data Model
|
|
@@ -277,6 +420,13 @@ CozoDB Relations (simplified) – all write operations create new `Validity` ent
|
|
|
277
420
|
|
|
278
421
|
The interface is reduced to **4 consolidated tools**. The concrete operation is always chosen via `action`.
|
|
279
422
|
|
|
423
|
+
| Tool | Purpose | Key Actions |
|
|
424
|
+
|------|---------|-------------|
|
|
425
|
+
| `mutate_memory` | Write operations | create_entity, update_entity, delete_entity, add_observation, create_relation, run_transaction, add_inference_rule, ingest_file |
|
|
426
|
+
| `query_memory` | Read operations | search, advancedSearch, context, entity_details, history, graph_rag, graph_walking |
|
|
427
|
+
| `analyze_graph` | Graph analysis | explore, communities, pagerank, betweenness, hits, shortest_path, bridge_discovery, semantic_walk, infer_relations |
|
|
428
|
+
| `manage_system` | Maintenance | health, metrics, export_memory, import_memory, snapshot_create, snapshot_list, snapshot_diff, cleanup, reflect, clear_memory |
|
|
429
|
+
|
|
280
430
|
### mutate_memory (Write)
|
|
281
431
|
|
|
282
432
|
Actions:
|
|
@@ -287,7 +437,11 @@ Actions:
|
|
|
287
437
|
- `create_relation`: `{ from_id, to_id, relation_type, strength?, metadata? }`
|
|
288
438
|
- `run_transaction`: `{ operations: Array<{ action, params }> }` **(New v1.2)**: Executes multiple operations atomically.
|
|
289
439
|
- `add_inference_rule`: `{ name, datalog }`
|
|
290
|
-
- `ingest_file`: `{ format, content
|
|
440
|
+
- `ingest_file`: `{ format, file_path?, content?, entity_id?, entity_name?, entity_type?, chunking?, metadata?, observation_metadata?, deduplicate?, max_observations? }`
|
|
441
|
+
- `format` options: `"markdown"`, `"json"`, `"pdf"` **(New v1.9)**
|
|
442
|
+
- `file_path`: Optional path to file on disk (alternative to `content` parameter)
|
|
443
|
+
- `content`: File content as string (required if `file_path` not provided)
|
|
444
|
+
- `chunking` options: `"none"`, `"paragraphs"` (future: `"semantic"`)
|
|
291
445
|
|
|
292
446
|
Important Details:
|
|
293
447
|
- `run_transaction` supports `create_entity`, `add_observation`, and `create_relation`. Parameters are automatically suffixed to avoid collisions.
|
|
@@ -338,7 +492,7 @@ Example (Transitive Manager ⇒ Upper Manager):
|
|
|
338
492
|
}
|
|
339
493
|
```
|
|
340
494
|
|
|
341
|
-
Bulk Ingestion (Markdown/JSON):
|
|
495
|
+
Bulk Ingestion (Markdown/JSON/PDF):
|
|
342
496
|
|
|
343
497
|
```json
|
|
344
498
|
{
|
|
@@ -352,6 +506,19 @@ Bulk Ingestion (Markdown/JSON):
|
|
|
352
506
|
}
|
|
353
507
|
```
|
|
354
508
|
|
|
509
|
+
PDF Ingestion via File Path:
|
|
510
|
+
|
|
511
|
+
```json
|
|
512
|
+
{
|
|
513
|
+
"action": "ingest_file",
|
|
514
|
+
"entity_name": "Research Paper",
|
|
515
|
+
"format": "pdf",
|
|
516
|
+
"file_path": "/path/to/document.pdf",
|
|
517
|
+
"chunking": "paragraphs",
|
|
518
|
+
"deduplicate": true
|
|
519
|
+
}
|
|
520
|
+
```
|
|
521
|
+
|
|
355
522
|
### query_memory (Read)
|
|
356
523
|
|
|
357
524
|
Actions:
|
|
@@ -447,7 +614,10 @@ Examples:
|
|
|
447
614
|
### manage_system (Maintenance)
|
|
448
615
|
|
|
449
616
|
Actions:
|
|
450
|
-
- `health`: `{}` returns DB counts + embedding cache stats.
|
|
617
|
+
- `health`: `{}` returns DB counts + embedding cache stats + performance metrics.
|
|
618
|
+
- `metrics`: `{}` returns detailed operation counts, error statistics, and performance data.
|
|
619
|
+
- `export_memory`: `{ format, includeMetadata?, includeRelationships?, includeObservations?, entityTypes?, since? }` exports memory to various formats.
|
|
620
|
+
- `import_memory`: `{ data, sourceFormat, mergeStrategy?, defaultEntityType? }` imports memory from external sources.
|
|
451
621
|
- `snapshot_create`: `{ metadata? }`
|
|
452
622
|
- `snapshot_list`: `{}`
|
|
453
623
|
- `snapshot_diff`: `{ snapshot_id_a, snapshot_id_b }`
|
|
@@ -460,6 +630,22 @@ Janitor Cleanup Details:
|
|
|
460
630
|
- With `confirm: true`, the Janitor becomes active:
|
|
461
631
|
- **Hierarchical Summarization**: Detects isolated or old observations, has them summarized by a local LLM (Ollama), and creates a new `ExecutiveSummary` node. Old fragments are deleted to reduce noise while preserving knowledge.
|
|
462
632
|
|
|
633
|
+
**Before Janitor:**
|
|
634
|
+
```
|
|
635
|
+
Entity: Project X
|
|
636
|
+
├─ Observation 1: "Started in Q1" (90 days old, isolated)
|
|
637
|
+
├─ Observation 2: "Uses React" (85 days old, isolated)
|
|
638
|
+
├─ Observation 3: "Team of 5" (80 days old, isolated)
|
|
639
|
+
└─ Observation 4: "Deployed to staging" (75 days old, isolated)
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
**After Janitor:**
|
|
643
|
+
```
|
|
644
|
+
Entity: Project X
|
|
645
|
+
└─ ExecutiveSummary: "Project X is a React-based application started in Q1
|
|
646
|
+
with a team of 5 developers, currently deployed to staging environment."
|
|
647
|
+
```
|
|
648
|
+
|
|
463
649
|
Reflection Service Details:
|
|
464
650
|
- `reflect` analyzes observations of an entity (or top 5 active entities) to find contradictions, patterns, or temporal developments.
|
|
465
651
|
- Results are persisted as new observations with metadata field `{ "kind": "reflection" }` and are retrievable via `context`.
|
|
@@ -467,6 +653,97 @@ Reflection Service Details:
|
|
|
467
653
|
|
|
468
654
|
Defaults: `older_than_days=30`, `max_observations=20`, `min_entity_degree=2`, `model="demyagent-4b-i1:Q6_K"`.
|
|
469
655
|
|
|
656
|
+
Export/Import Details:
|
|
657
|
+
- `export_memory` supports three formats:
|
|
658
|
+
- **JSON** (`format: "json"`): Native Cozo format, fully re-importable with all metadata and timestamps.
|
|
659
|
+
- **Markdown** (`format: "markdown"`): Human-readable document with entities, observations, and relationships.
|
|
660
|
+
- **Obsidian** (`format: "obsidian"`): ZIP archive with Wiki-Links `[[Entity]]`, YAML frontmatter, ready for Obsidian vault.
|
|
661
|
+
- `import_memory` supports four source formats:
|
|
662
|
+
- **Cozo** (`sourceFormat: "cozo"`): Import from native JSON export.
|
|
663
|
+
- **Mem0** (`sourceFormat: "mem0"`): Import from Mem0 format (user_id becomes entity).
|
|
664
|
+
- **MemGPT** (`sourceFormat: "memgpt"`): Import from MemGPT archival/recall memory.
|
|
665
|
+
- **Markdown** (`sourceFormat: "markdown"`): Parse markdown sections as entities with observations.
|
|
666
|
+
- Merge strategies: `skip` (default, skip duplicates), `overwrite` (replace existing), `merge` (combine metadata).
|
|
667
|
+
- Optional filters: `entityTypes` (array), `since` (Unix timestamp in ms), `includeMetadata`, `includeRelationships`, `includeObservations`.
|
|
668
|
+
|
|
669
|
+
Example Export:
|
|
670
|
+
```json
|
|
671
|
+
{
|
|
672
|
+
"action": "export_memory",
|
|
673
|
+
"format": "obsidian",
|
|
674
|
+
"includeMetadata": true,
|
|
675
|
+
"entityTypes": ["Person", "Project"]
|
|
676
|
+
}
|
|
677
|
+
```
|
|
678
|
+
|
|
679
|
+
Example Import:
|
|
680
|
+
```json
|
|
681
|
+
{
|
|
682
|
+
"action": "import_memory",
|
|
683
|
+
"sourceFormat": "mem0",
|
|
684
|
+
"data": "{\"user_id\": \"alice\", \"memories\": [...]}",
|
|
685
|
+
"mergeStrategy": "skip"
|
|
686
|
+
}
|
|
687
|
+
```
|
|
688
|
+
|
|
689
|
+
Production Monitoring Details:
|
|
690
|
+
- `health` provides comprehensive system status including entity/observation/relationship counts, embedding cache statistics, and performance metrics (last operation time, average operation time, total operations).
|
|
691
|
+
- `metrics` returns detailed operational metrics:
|
|
692
|
+
- **Operation Counts**: Tracks create_entity, update_entity, delete_entity, add_observation, create_relation, search, and graph_operations.
|
|
693
|
+
- **Error Statistics**: Total errors and breakdown by operation type.
|
|
694
|
+
- **Performance Metrics**: Last operation duration, average operation duration, and total operations executed.
|
|
695
|
+
- Delete operations now include detailed logging with verification steps and return statistics about deleted data (observations, outgoing/incoming relations).
|
|
696
|
+
|
|
697
|
+
## Production Monitoring
|
|
698
|
+
|
|
699
|
+
The system includes comprehensive monitoring capabilities for production deployments:
|
|
700
|
+
|
|
701
|
+
### Metrics Tracking
|
|
702
|
+
|
|
703
|
+
All operations are automatically tracked with detailed metrics:
|
|
704
|
+
- Operation counts by type (create, update, delete, search, etc.)
|
|
705
|
+
- Error tracking with breakdown by operation
|
|
706
|
+
- Performance metrics (latency, throughput)
|
|
707
|
+
|
|
708
|
+
### Health Endpoint
|
|
709
|
+
|
|
710
|
+
The `health` action provides real-time system status:
|
|
711
|
+
```json
|
|
712
|
+
{ "action": "health" }
|
|
713
|
+
```
|
|
714
|
+
|
|
715
|
+
Returns:
|
|
716
|
+
- Database counts (entities, observations, relationships)
|
|
717
|
+
- Embedding cache statistics (hit rate, size)
|
|
718
|
+
- Performance metrics (last operation time, average time, total operations)
|
|
719
|
+
|
|
720
|
+
### Metrics Endpoint
|
|
721
|
+
|
|
722
|
+
The `metrics` action provides detailed operational metrics:
|
|
723
|
+
```json
|
|
724
|
+
{ "action": "metrics" }
|
|
725
|
+
```
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
- **operations**: Count of each operation type
|
|
729
|
+
- **errors**: Total errors and breakdown by operation
|
|
730
|
+
- **performance**: Last operation duration, average duration, total operations
|
|
731
|
+
|
|
732
|
+
### Enhanced Delete Operations
|
|
733
|
+
|
|
734
|
+
Delete operations include comprehensive logging and verification:
|
|
735
|
+
- Detailed step-by-step logging with `[Delete]` prefix
|
|
736
|
+
- Counts related data before deletion
|
|
737
|
+
- Verification after deletion
|
|
738
|
+
- Returns statistics: `{ deleted: { observations: N, outgoing_relations: N, incoming_relations: N } }`
|
|
739
|
+
|
|
740
|
+
Example:
|
|
741
|
+
```json
|
|
742
|
+
{ "action": "delete_entity", "entity_id": "ENTITY_ID" }
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
Returns deletion statistics showing exactly what was removed.
|
|
746
|
+
|
|
470
747
|
## Technical Highlights
|
|
471
748
|
|
|
472
749
|
### Local ONNX Embeddings (Transformers)
|
|
@@ -572,8 +849,65 @@ npx ts-node test-user-pref.ts
|
|
|
572
849
|
|
|
573
850
|
## Troubleshooting
|
|
574
851
|
|
|
575
|
-
|
|
576
|
-
|
|
852
|
+
### Common Issues
|
|
853
|
+
|
|
854
|
+
**First Start Takes Long**
|
|
855
|
+
- The embedding model download takes 30-90 seconds on first start (Transformers loads ~500MB of artifacts)
|
|
856
|
+
- This is normal and only happens once
|
|
857
|
+
- Subsequent starts are fast (< 2 seconds)
|
|
858
|
+
|
|
859
|
+
**Cleanup/Reflect Requires Ollama**
|
|
860
|
+
- If using `cleanup` or `reflect` actions, an Ollama service must be running locally
|
|
861
|
+
- Install Ollama from https://ollama.ai
|
|
862
|
+
- Pull the desired model: `ollama pull demyagent-4b-i1:Q6_K` (or your preferred model)
|
|
863
|
+
|
|
864
|
+
**Windows-Specific**
|
|
865
|
+
- Embeddings are processed on CPU for maximum compatibility
|
|
866
|
+
- RocksDB backend requires Visual C++ Redistributable if using that option
|
|
867
|
+
|
|
868
|
+
**Performance Issues**
|
|
869
|
+
- First query after restart is slower (cold cache)
|
|
870
|
+
- Use `health` action to check cache hit rates
|
|
871
|
+
- Consider RocksDB backend for datasets > 100k entities
|
|
872
|
+
|
|
873
|
+
## Roadmap
|
|
874
|
+
|
|
875
|
+
CozoDB Memory is actively developed. Here's what's planned:
|
|
876
|
+
|
|
877
|
+
### Near-Term (v1.x)
|
|
878
|
+
|
|
879
|
+
- **GPU Acceleration** - CUDA support for embedding generation (10-50x faster)
|
|
880
|
+
- **Streaming Ingestion** - Real-time data ingestion from logs, APIs, webhooks
|
|
881
|
+
- **Advanced Chunking** - Semantic chunking for `ingest_file` (paragraph-aware splitting)
|
|
882
|
+
- **Query Optimization** - Automatic query plan optimization for complex graph traversals
|
|
883
|
+
- **Additional Export Formats** - Notion, Roam Research, Logseq compatibility
|
|
884
|
+
|
|
885
|
+
### Mid-Term (v2.x)
|
|
886
|
+
|
|
887
|
+
- **Multi-Modal Embeddings** - Image and audio embedding support via CLIP/Whisper
|
|
888
|
+
- **Distributed Mode** - Multi-node deployment with CozoDB clustering
|
|
889
|
+
- **Real-Time Sync** - WebSocket-based live updates for collaborative use cases
|
|
890
|
+
- **Advanced Inference** - Causal reasoning, temporal pattern detection
|
|
891
|
+
- **Web UI** - Optional web interface for memory exploration and visualization
|
|
892
|
+
|
|
893
|
+
### Long-Term
|
|
894
|
+
|
|
895
|
+
- **Federated Learning** - Privacy-preserving model updates across instances
|
|
896
|
+
- **Custom Embedding Models** - Fine-tune embeddings on domain-specific data
|
|
897
|
+
- **Plugin System** - Extensible architecture for custom tools and integrations
|
|
898
|
+
|
|
899
|
+
### Community Requests
|
|
900
|
+
|
|
901
|
+
Have a feature idea? Open an issue with the `enhancement` label or check [Low-Hanging-Fruit.md](Low-Hanging-Fruit.md) for quick wins you can contribute.
|
|
902
|
+
|
|
903
|
+
## Contributing
|
|
904
|
+
|
|
905
|
+
Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on:
|
|
906
|
+
|
|
907
|
+
- Setting up the development environment
|
|
908
|
+
- Coding standards and best practices
|
|
909
|
+
- Testing and documentation requirements
|
|
910
|
+
- Pull request process
|
|
577
911
|
|
|
578
912
|
## License
|
|
579
913
|
|
package/dist/download-model.js
CHANGED
|
@@ -33,12 +33,14 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
require("dotenv/config"); // Load .env file first
|
|
36
37
|
const transformers_1 = require("@xenova/transformers");
|
|
37
38
|
const path = __importStar(require("path"));
|
|
38
39
|
// Configure cache path
|
|
39
40
|
const CACHE_DIR = path.resolve('./.cache');
|
|
40
41
|
transformers_1.env.cacheDir = CACHE_DIR;
|
|
41
|
-
|
|
42
|
+
// Read model from environment variable or use default
|
|
43
|
+
const MODEL_ID = process.env.EMBEDDING_MODEL || "Xenova/bge-m3";
|
|
42
44
|
async function downloadModel() {
|
|
43
45
|
console.log(`Downloading FP32 model for ${MODEL_ID}...`);
|
|
44
46
|
// quantized: false forces FP32 model download
|
|
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.EmbeddingService = void 0;
|
|
37
|
+
require("dotenv/config"); // Load .env file first
|
|
37
38
|
const transformers_1 = require("@xenova/transformers");
|
|
38
39
|
const ort = require('onnxruntime-node');
|
|
39
40
|
const path = __importStar(require("path"));
|
|
@@ -91,11 +92,27 @@ class EmbeddingService {
|
|
|
91
92
|
cache;
|
|
92
93
|
session = null;
|
|
93
94
|
tokenizer = null;
|
|
94
|
-
modelId
|
|
95
|
-
dimensions
|
|
95
|
+
modelId;
|
|
96
|
+
dimensions;
|
|
96
97
|
queue = Promise.resolve();
|
|
97
98
|
constructor() {
|
|
98
99
|
this.cache = new LRUCache(1000, 3600000); // 1000 entries, 1h TTL
|
|
100
|
+
// Support multiple embedding models via environment variable
|
|
101
|
+
this.modelId = process.env.EMBEDDING_MODEL || "Xenova/bge-m3";
|
|
102
|
+
// Set dimensions based on model
|
|
103
|
+
const dimensionMap = {
|
|
104
|
+
"Xenova/bge-m3": 1024,
|
|
105
|
+
"Xenova/all-MiniLM-L6-v2": 384,
|
|
106
|
+
"Xenova/bge-small-en-v1.5": 384,
|
|
107
|
+
"Xenova/nomic-embed-text-v1": 768,
|
|
108
|
+
"onnx-community/Qwen3-Embedding-0.6B-ONNX": 1024,
|
|
109
|
+
};
|
|
110
|
+
this.dimensions = dimensionMap[this.modelId] || 1024;
|
|
111
|
+
console.log(`[EmbeddingService] Using model: ${this.modelId} (${this.dimensions} dimensions)`);
|
|
112
|
+
}
|
|
113
|
+
// Public getter for dimensions
|
|
114
|
+
getDimensions() {
|
|
115
|
+
return this.dimensions;
|
|
99
116
|
}
|
|
100
117
|
// Serializes embedding execution to avoid event loop blocking
|
|
101
118
|
async runSerialized(task) {
|
|
@@ -109,21 +126,38 @@ class EmbeddingService {
|
|
|
109
126
|
if (this.session && this.tokenizer)
|
|
110
127
|
return;
|
|
111
128
|
try {
|
|
112
|
-
// 1.
|
|
129
|
+
// 1. Check if model needs to be downloaded
|
|
130
|
+
// Extract namespace and model name from modelId (e.g., "Xenova/bge-m3" or "onnx-community/Qwen3-Embedding-0.6B-ONNX")
|
|
131
|
+
const parts = this.modelId.split('/');
|
|
132
|
+
const namespace = parts[0];
|
|
133
|
+
const modelName = parts[1];
|
|
134
|
+
// Try both possible cache locations
|
|
135
|
+
let baseDir = path.join(transformers_1.env.cacheDir, namespace, modelName, 'onnx');
|
|
136
|
+
let fp32Path = path.join(baseDir, 'model.onnx');
|
|
137
|
+
let quantizedPath = path.join(baseDir, 'model_quantized.onnx');
|
|
138
|
+
// If ONNX model files don't exist, download them
|
|
139
|
+
if (!fs.existsSync(fp32Path) && !fs.existsSync(quantizedPath)) {
|
|
140
|
+
console.log(`[EmbeddingService] Model not found, downloading ${this.modelId}...`);
|
|
141
|
+
console.log(`[EmbeddingService] This may take a few minutes on first run.`);
|
|
142
|
+
// Import AutoModel dynamically to trigger download
|
|
143
|
+
const { AutoModel } = await import("@xenova/transformers");
|
|
144
|
+
await AutoModel.from_pretrained(this.modelId, { quantized: false });
|
|
145
|
+
console.log(`[EmbeddingService] Model download completed.`);
|
|
146
|
+
}
|
|
147
|
+
// 2. Load Tokenizer
|
|
113
148
|
if (!this.tokenizer) {
|
|
114
149
|
this.tokenizer = await transformers_1.AutoTokenizer.from_pretrained(this.modelId);
|
|
115
150
|
}
|
|
116
|
-
//
|
|
117
|
-
const baseDir = path.join(transformers_1.env.cacheDir, 'Xenova', 'bge-m3', 'onnx');
|
|
151
|
+
// 3. Determine model path
|
|
118
152
|
// Priority: FP32 (model.onnx) > Quantized (model_quantized.onnx)
|
|
119
|
-
let modelPath =
|
|
153
|
+
let modelPath = fp32Path;
|
|
120
154
|
if (!fs.existsSync(modelPath)) {
|
|
121
|
-
modelPath =
|
|
155
|
+
modelPath = quantizedPath;
|
|
122
156
|
}
|
|
123
157
|
if (!fs.existsSync(modelPath)) {
|
|
124
|
-
throw new Error(`Model file not found at: ${modelPath}
|
|
158
|
+
throw new Error(`Model file not found at: ${modelPath}. Download may have failed.`);
|
|
125
159
|
}
|
|
126
|
-
//
|
|
160
|
+
// 4. Create Session
|
|
127
161
|
if (!this.session) {
|
|
128
162
|
const options = {
|
|
129
163
|
executionProviders: ['cpu'], // Use CPU backend to avoid native conflicts
|
|
@@ -139,7 +173,15 @@ class EmbeddingService {
|
|
|
139
173
|
}
|
|
140
174
|
async embed(text) {
|
|
141
175
|
return this.runSerialized(async () => {
|
|
142
|
-
|
|
176
|
+
let textStr = String(text || "");
|
|
177
|
+
// For Qwen3-Embedding models, add instruction prefix for better results
|
|
178
|
+
// (only for queries, not for documents being indexed)
|
|
179
|
+
if (this.modelId.includes('Qwen3-Embedding')) {
|
|
180
|
+
// Add instruction prefix if not already present
|
|
181
|
+
if (!textStr.startsWith('Instruct:')) {
|
|
182
|
+
textStr = `Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery: ${textStr}`;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
143
185
|
// 1. Cache lookup
|
|
144
186
|
const cached = this.cache.get(textStr);
|
|
145
187
|
if (cached) {
|
|
@@ -171,14 +213,22 @@ class EmbeddingService {
|
|
|
171
213
|
const results = await this.session.run(feeds);
|
|
172
214
|
// 5. Pooling & Normalization
|
|
173
215
|
// Output name usually 'last_hidden_state' or 'logits'
|
|
174
|
-
// For BGE-M3, the first output is usually the hidden states [batch, seq_len, hidden_size]
|
|
175
216
|
const outputName = this.session.outputNames[0];
|
|
176
217
|
const outputTensor = results[outputName];
|
|
177
218
|
// Ensure we have data
|
|
178
219
|
if (!outputTensor || !attentionMaskData) {
|
|
179
220
|
throw new Error("No output data or attention mask available");
|
|
180
221
|
}
|
|
181
|
-
|
|
222
|
+
// Choose pooling strategy based on model
|
|
223
|
+
let embedding;
|
|
224
|
+
if (this.modelId.includes('Qwen3-Embedding')) {
|
|
225
|
+
// Qwen3-Embedding uses last token pooling
|
|
226
|
+
embedding = this.lastTokenPooling(outputTensor.data, attentionMaskData, outputTensor.dims);
|
|
227
|
+
}
|
|
228
|
+
else {
|
|
229
|
+
// BGE and other models use mean pooling
|
|
230
|
+
embedding = this.meanPooling(outputTensor.data, attentionMaskData, outputTensor.dims);
|
|
231
|
+
}
|
|
182
232
|
// Normalize
|
|
183
233
|
const normalized = this.normalize(embedding);
|
|
184
234
|
this.cache.set(textStr, normalized);
|
|
@@ -200,6 +250,25 @@ class EmbeddingService {
|
|
|
200
250
|
}
|
|
201
251
|
return results;
|
|
202
252
|
}
|
|
253
|
+
lastTokenPooling(data, attentionMask, dims) {
|
|
254
|
+
// dims: [batch_size, seq_len, hidden_size]
|
|
255
|
+
// Extract the last valid token's hidden state
|
|
256
|
+
const [batchSize, seqLen, hiddenSize] = dims;
|
|
257
|
+
// Find last valid token position
|
|
258
|
+
let lastValidIdx = seqLen - 1;
|
|
259
|
+
for (let i = seqLen - 1; i >= 0; i--) {
|
|
260
|
+
if (attentionMask[i] === 1n) {
|
|
261
|
+
lastValidIdx = i;
|
|
262
|
+
break;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
// Extract embedding at last valid position
|
|
266
|
+
const embedding = new Float32Array(hiddenSize);
|
|
267
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
268
|
+
embedding[j] = data[lastValidIdx * hiddenSize + j];
|
|
269
|
+
}
|
|
270
|
+
return Array.from(embedding);
|
|
271
|
+
}
|
|
203
272
|
meanPooling(data, attentionMask, dims) {
|
|
204
273
|
// dims: [batch_size, seq_len, hidden_size]
|
|
205
274
|
// We assume batch_size = 1 for single embedding call
|