persyst-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +141 -0
- package/index.js +17 -0
- package/package.json +45 -0
- package/src/database.js +473 -0
- package/src/embeddings.js +55 -0
- package/src/git.js +97 -0
- package/src/search.js +109 -0
- package/src/server.js +52 -0
- package/src/setup-wasm.js +70 -0
- package/src/tools.js +290 -0
package/README.md
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Persyst
|
|
2
|
+
|
|
3
|
+
**Local-first MCP memory server for coding agents.**
|
|
4
|
+
|
|
5
|
+
Persyst gives AI coding agents (Claude Code, Cursor, Aider, Windsurf) persistent memory across sessions. It stores memories in a local SQLite database with hybrid keyword + semantic search — no cloud, no API keys, works offline.
|
|
6
|
+
|
|
7
|
+
## How It Works
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Your AI Agent ←→ MCP (stdio) ←→ Persyst ←→ SQLite (local)
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
1. **Agent stores a memory** → Persyst saves it + generates a search embedding
|
|
14
|
+
2. **Agent searches memories** → Persyst finds matches by both keywords AND meaning
|
|
15
|
+
3. **"dark mode" ↔ "night theme"** → Semantic search understands synonyms
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
### 1. Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install -g persyst-mcp
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### 2. Add to Claude Code
|
|
26
|
+
|
|
27
|
+
Edit your Claude Code MCP config (`claude_desktop_config.json`):
|
|
28
|
+
|
|
29
|
+
```json
|
|
30
|
+
{
|
|
31
|
+
"mcpServers": {
|
|
32
|
+
"persyst": {
|
|
33
|
+
"command": "persyst-mcp"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 3. Use It
|
|
40
|
+
|
|
41
|
+
In Claude Code, the agent can now call tools like:
|
|
42
|
+
- `add_memory` — Store a fact
|
|
43
|
+
- `search_memories` — Find relevant memories
|
|
44
|
+
- `get_memory` — Get a specific memory
|
|
45
|
+
- `update_memory` — Update a memory
|
|
46
|
+
- `delete_memory` — Remove a memory
|
|
47
|
+
- `get_recent_memories` — Latest memories
|
|
48
|
+
- `get_important_memories` — Most important memories
|
|
49
|
+
|
|
50
|
+
## Setup for Other Agents
|
|
51
|
+
|
|
52
|
+
### Cursor
|
|
53
|
+
|
|
54
|
+
Add to your Cursor MCP settings:
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
{
|
|
58
|
+
"persyst": {
|
|
59
|
+
"command": "persyst-mcp"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Aider
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Start the MCP server alongside Aider
|
|
68
|
+
persyst-mcp &
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Available Tools
|
|
72
|
+
|
|
73
|
+
| Tool | Description | Parameters |
|
|
74
|
+
|------|-------------|------------|
|
|
75
|
+
| `add_memory` | Store a new memory | `content` (string), `importance` (0-1, optional) |
|
|
76
|
+
| `search_memories` | Hybrid keyword + semantic search | `query` (string), `limit` (number) |
|
|
77
|
+
| `get_memory` | Get memory by ID | `id` (number) |
|
|
78
|
+
| `update_memory` | Update memory content | `id` (number), `content` (string) |
|
|
79
|
+
| `delete_memory` | Delete a memory | `id` (number) |
|
|
80
|
+
| `get_recent_memories` | Get latest memories | `limit` (number) |
|
|
81
|
+
| `get_important_memories` | Get by importance score | `limit` (number) |
|
|
82
|
+
|
|
83
|
+
## How Search Works
|
|
84
|
+
|
|
85
|
+
Persyst uses **hybrid search** — combining two strategies:
|
|
86
|
+
|
|
87
|
+
1. **Keyword Search (FTS5)** — Exact word matches using BM25 ranking
|
|
88
|
+
2. **Semantic Search (sqlite-vec)** — Meaning-based using local embeddings
|
|
89
|
+
|
|
90
|
+
Results from both are merged. Keyword matches get a score boost so exact matches rank higher, but semantic matches still surface related memories.
|
|
91
|
+
|
|
92
|
+
## Architecture
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
persyst/
|
|
96
|
+
├── index.js ← Entry point (starts MCP server)
|
|
97
|
+
├── src/
|
|
98
|
+
│ ├── server.js ← MCP server (stdio transport)
|
|
99
|
+
│ ├── database.js ← SQLite + schema + CRUD
|
|
100
|
+
│ ├── search.js ← Hybrid search engine
|
|
101
|
+
│ ├── embeddings.js ← Local embedding generation
|
|
102
|
+
│ └── tools.js ← 7 MCP tool definitions
|
|
103
|
+
├── test/
|
|
104
|
+
│ └── smoke.js ← End-to-end test
|
|
105
|
+
└── db/ ← Database files (gitignored)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Data Storage
|
|
109
|
+
|
|
110
|
+
- Database location: `~/.persyst/persyst.db`
|
|
111
|
+
- All data stays on your machine
|
|
112
|
+
- No telemetry, no cloud calls, no API keys
|
|
113
|
+
- Works offline (airplane mode ✓)
|
|
114
|
+
|
|
115
|
+
## Tech Stack
|
|
116
|
+
|
|
117
|
+
- **Runtime:** Node.js 18+
|
|
118
|
+
- **Database:** SQLite via better-sqlite3
|
|
119
|
+
- **Vector Search:** sqlite-vec (local, no cloud)
|
|
120
|
+
- **Full-Text Search:** SQLite FTS5
|
|
121
|
+
- **Embeddings:** @huggingface/transformers + all-MiniLM-L6-v2 (384-dim, ~50MB)
|
|
122
|
+
- **Protocol:** MCP over stdio
|
|
123
|
+
|
|
124
|
+
## Development
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
# Clone and install
|
|
128
|
+
git clone <repo-url>
|
|
129
|
+
cd persyst
|
|
130
|
+
npm install
|
|
131
|
+
|
|
132
|
+
# Run smoke test
|
|
133
|
+
npm test
|
|
134
|
+
|
|
135
|
+
# Start server directly
|
|
136
|
+
node index.js
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT
|
package/index.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Persyst MCP Server — Entry Point
|
|
5
|
+
*
|
|
6
|
+
* A local-first memory server for coding agents.
|
|
7
|
+
* Starts the MCP server on stdio transport.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node index.js (direct)
|
|
11
|
+
* npx persyst-mcp (via npm)
|
|
12
|
+
* persyst-mcp (if installed globally)
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { startServer } from './src/server.js';
|
|
16
|
+
|
|
17
|
+
startServer();
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "persyst-mcp",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Local-first MCP memory server with hybrid keyword + semantic search for coding agents",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"persyst-mcp": "index.js"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"start": "node index.js",
|
|
12
|
+
"test": "node test/smoke.js",
|
|
13
|
+
"test:heavy": "cross-env NODE_ENV=test node --test test/test_*.js"
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"mcp",
|
|
17
|
+
"memory",
|
|
18
|
+
"sqlite",
|
|
19
|
+
"local-first",
|
|
20
|
+
"ai",
|
|
21
|
+
"agents",
|
|
22
|
+
"semantic-search",
|
|
23
|
+
"knowledge-graph"
|
|
24
|
+
],
|
|
25
|
+
"author": "Zayn",
|
|
26
|
+
"license": "MIT",
|
|
27
|
+
"repository": {
|
|
28
|
+
"type": "git",
|
|
29
|
+
"url": "git+https://github.com/ZayniBaloch/Peryst.git"
|
|
30
|
+
},
|
|
31
|
+
"bugs": {
|
|
32
|
+
"url": "https://github.com/ZayniBaloch/Peryst/issues"
|
|
33
|
+
},
|
|
34
|
+
"homepage": "https://github.com/ZayniBaloch/Peryst#readme",
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@huggingface/transformers": "^4.2.0",
|
|
37
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
38
|
+
"better-sqlite3": "^12.10.0",
|
|
39
|
+
"sqlite-vec": "^0.1.9",
|
|
40
|
+
"zod": "^3.23.0"
|
|
41
|
+
},
|
|
42
|
+
"devDependencies": {
|
|
43
|
+
"cross-env": "^10.1.0"
|
|
44
|
+
}
|
|
45
|
+
}
|
package/src/database.js
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* database.js — SQLite Database Setup & CRUD Operations
|
|
3
|
+
*
|
|
4
|
+
* This file handles everything database-related:
|
|
5
|
+
* - Opens SQLite connection at ~/.persyst/persyst.db
|
|
6
|
+
* - Loads the sqlite-vec extension for vector search
|
|
7
|
+
* - Creates all tables (memories, FTS5 index, vector index)
|
|
8
|
+
* - Exports simple CRUD functions for other modules to use
|
|
9
|
+
*
|
|
10
|
+
* IMPORTANT: better-sqlite3 is SYNCHRONOUS. No async/await here.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import Database from 'better-sqlite3';
|
|
14
|
+
import * as sqliteVec from 'sqlite-vec';
|
|
15
|
+
import { join } from 'path';
|
|
16
|
+
import { homedir } from 'os';
|
|
17
|
+
import { mkdirSync } from 'fs';
|
|
18
|
+
|
|
19
|
+
// ============================================================
|
|
20
|
+
// DATABASE LOCATION
|
|
21
|
+
// Store in ~/.persyst/ so data persists across sessions
|
|
22
|
+
// ============================================================
|
|
23
|
+
|
|
24
|
+
const DB_DIR = join(homedir(), '.persyst');
|
|
25
|
+
mkdirSync(DB_DIR, { recursive: true });
|
|
26
|
+
const DB_PATH = process.env.NODE_ENV === 'test' ? ':memory:' : join(DB_DIR, 'persyst.db');
|
|
27
|
+
|
|
28
|
+
// ============================================================
|
|
29
|
+
// INITIALIZE CONNECTION
|
|
30
|
+
// ============================================================
|
|
31
|
+
|
|
32
|
+
const db = new Database(DB_PATH);
|
|
33
|
+
db.pragma('journal_mode = WAL'); // Better performance for concurrent reads
|
|
34
|
+
db.pragma('foreign_keys = ON'); // Enforce referential integrity
|
|
35
|
+
|
|
36
|
+
// Load sqlite-vec BEFORE creating any vec0 tables
|
|
37
|
+
sqliteVec.load(db);
|
|
38
|
+
|
|
39
|
+
console.error(`[persyst] Database: ${DB_PATH}`);
|
|
40
|
+
|
|
41
|
+
// ============================================================
|
|
42
|
+
// CREATE TABLES
|
|
43
|
+
// ============================================================
|
|
44
|
+
|
|
45
|
+
// --- Main memories table ---
|
|
46
|
+
db.exec(`
|
|
47
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
48
|
+
id INTEGER PRIMARY KEY,
|
|
49
|
+
content TEXT NOT NULL,
|
|
50
|
+
importance_score REAL DEFAULT 1.0,
|
|
51
|
+
created_at INTEGER DEFAULT (unixepoch()),
|
|
52
|
+
last_accessed INTEGER DEFAULT (unixepoch()),
|
|
53
|
+
access_count INTEGER DEFAULT 0
|
|
54
|
+
)
|
|
55
|
+
`);
|
|
56
|
+
|
|
57
|
+
// --- FTS5 full-text search index (keyword search with BM25) ---
|
|
58
|
+
db.exec(`
|
|
59
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
|
60
|
+
content,
|
|
61
|
+
content='memories',
|
|
62
|
+
content_rowid='id'
|
|
63
|
+
)
|
|
64
|
+
`);
|
|
65
|
+
|
|
66
|
+
// --- FTS5 auto-sync triggers ---
|
|
67
|
+
// These keep the FTS index in sync when memories are added/updated/deleted.
|
|
68
|
+
// Using try/catch because "CREATE TRIGGER IF NOT EXISTS" isn't supported.
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
db.exec(`
|
|
72
|
+
CREATE TRIGGER memories_fts_insert AFTER INSERT ON memories
|
|
73
|
+
BEGIN
|
|
74
|
+
INSERT INTO memories_fts(rowid, content) VALUES (new.id, new.content);
|
|
75
|
+
END
|
|
76
|
+
`);
|
|
77
|
+
} catch (e) { /* trigger already exists */ }
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
db.exec(`
|
|
81
|
+
CREATE TRIGGER memories_fts_delete AFTER DELETE ON memories
|
|
82
|
+
BEGIN
|
|
83
|
+
INSERT INTO memories_fts(memories_fts, rowid, content)
|
|
84
|
+
VALUES ('delete', old.id, old.content);
|
|
85
|
+
END
|
|
86
|
+
`);
|
|
87
|
+
} catch (e) { /* trigger already exists */ }
|
|
88
|
+
|
|
89
|
+
try {
|
|
90
|
+
db.exec(`
|
|
91
|
+
CREATE TRIGGER memories_fts_update AFTER UPDATE OF content ON memories
|
|
92
|
+
BEGIN
|
|
93
|
+
INSERT INTO memories_fts(memories_fts, rowid, content)
|
|
94
|
+
VALUES ('delete', old.id, old.content);
|
|
95
|
+
INSERT INTO memories_fts(rowid, content)
|
|
96
|
+
VALUES (new.id, new.content);
|
|
97
|
+
END
|
|
98
|
+
`);
|
|
99
|
+
} catch (e) { /* trigger already exists */ }
|
|
100
|
+
|
|
101
|
+
// --- Vector table for semantic search (384-dim embeddings) ---
|
|
102
|
+
db.exec(`
|
|
103
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec USING vec0(
|
|
104
|
+
embedding float[384]
|
|
105
|
+
)
|
|
106
|
+
`);
|
|
107
|
+
|
|
108
|
+
// --- Knowledge Graph: entities + edges ---
|
|
109
|
+
// Entities are the "nouns" — people, files, tech, concepts
|
|
110
|
+
db.exec(`
|
|
111
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
112
|
+
id INTEGER PRIMARY KEY,
|
|
113
|
+
name TEXT NOT NULL UNIQUE,
|
|
114
|
+
type TEXT NOT NULL,
|
|
115
|
+
created_at INTEGER DEFAULT (unixepoch())
|
|
116
|
+
)
|
|
117
|
+
`);
|
|
118
|
+
|
|
119
|
+
// Edges connect entities to memories (or entities to entities)
|
|
120
|
+
db.exec(`
|
|
121
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
122
|
+
id INTEGER PRIMARY KEY,
|
|
123
|
+
source_id INTEGER NOT NULL,
|
|
124
|
+
target_id INTEGER NOT NULL,
|
|
125
|
+
relation TEXT NOT NULL,
|
|
126
|
+
source_type TEXT NOT NULL,
|
|
127
|
+
target_type TEXT NOT NULL,
|
|
128
|
+
created_at INTEGER DEFAULT (unixepoch())
|
|
129
|
+
)
|
|
130
|
+
`);
|
|
131
|
+
|
|
132
|
+
console.error('[persyst] Schema initialized ✓');
|
|
133
|
+
|
|
134
|
+
// ============================================================
|
|
135
|
+
// PREPARED STATEMENTS
|
|
136
|
+
// Pre-compile SQL for performance. better-sqlite3 is synchronous.
|
|
137
|
+
// ============================================================
|
|
138
|
+
|
|
139
|
+
const stmts = {
|
|
140
|
+
// -- Insert --
|
|
141
|
+
insertMemory: db.prepare(
|
|
142
|
+
'INSERT INTO memories (content, importance_score) VALUES (?, ?)'
|
|
143
|
+
),
|
|
144
|
+
insertVec: db.prepare(
|
|
145
|
+
'INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)'
|
|
146
|
+
),
|
|
147
|
+
|
|
148
|
+
// -- Read --
|
|
149
|
+
getById: db.prepare(
|
|
150
|
+
'SELECT * FROM memories WHERE id = ?'
|
|
151
|
+
),
|
|
152
|
+
getRecent: db.prepare(
|
|
153
|
+
'SELECT * FROM memories ORDER BY created_at DESC LIMIT ?'
|
|
154
|
+
),
|
|
155
|
+
getImportant: db.prepare(
|
|
156
|
+
'SELECT * FROM memories ORDER BY importance_score DESC LIMIT ?'
|
|
157
|
+
),
|
|
158
|
+
|
|
159
|
+
// -- Update --
|
|
160
|
+
updateContent: db.prepare(
|
|
161
|
+
'UPDATE memories SET content = ? WHERE id = ?'
|
|
162
|
+
),
|
|
163
|
+
|
|
164
|
+
// -- Delete --
|
|
165
|
+
deleteMemory: db.prepare(
|
|
166
|
+
'DELETE FROM memories WHERE id = ?'
|
|
167
|
+
),
|
|
168
|
+
deleteVec: db.prepare(
|
|
169
|
+
'DELETE FROM memories_vec WHERE rowid = ?'
|
|
170
|
+
),
|
|
171
|
+
|
|
172
|
+
// -- Memory Lifecycle --
|
|
173
|
+
boost: db.prepare(`
|
|
174
|
+
UPDATE memories
|
|
175
|
+
SET access_count = access_count + 1,
|
|
176
|
+
importance_score = MIN(importance_score + 0.1, 2.0),
|
|
177
|
+
last_accessed = unixepoch()
|
|
178
|
+
WHERE id = ?
|
|
179
|
+
`),
|
|
180
|
+
decay: db.prepare(`
|
|
181
|
+
UPDATE memories
|
|
182
|
+
SET importance_score = importance_score * 0.95
|
|
183
|
+
WHERE (? - last_accessed) > 604800
|
|
184
|
+
`),
|
|
185
|
+
|
|
186
|
+
// -- Search --
|
|
187
|
+
searchFts: db.prepare(`
|
|
188
|
+
SELECT rowid AS id, rank
|
|
189
|
+
FROM memories_fts
|
|
190
|
+
WHERE memories_fts MATCH ?
|
|
191
|
+
ORDER BY rank
|
|
192
|
+
LIMIT ?
|
|
193
|
+
`),
|
|
194
|
+
searchVec: db.prepare(`
|
|
195
|
+
SELECT rowid, distance
|
|
196
|
+
FROM memories_vec
|
|
197
|
+
WHERE embedding MATCH ?
|
|
198
|
+
AND k = ?
|
|
199
|
+
`),
|
|
200
|
+
|
|
201
|
+
// -- Entity CRUD --
|
|
202
|
+
insertEntity: db.prepare(
|
|
203
|
+
'INSERT OR IGNORE INTO entities (name, type) VALUES (?, ?)'
|
|
204
|
+
),
|
|
205
|
+
getEntityByName: db.prepare(
|
|
206
|
+
'SELECT * FROM entities WHERE name = ?'
|
|
207
|
+
),
|
|
208
|
+
getEntityById: db.prepare(
|
|
209
|
+
'SELECT * FROM entities WHERE id = ?'
|
|
210
|
+
),
|
|
211
|
+
getAllEntities: db.prepare(
|
|
212
|
+
'SELECT * FROM entities ORDER BY created_at DESC LIMIT ?'
|
|
213
|
+
),
|
|
214
|
+
deleteEntity: db.prepare(
|
|
215
|
+
'DELETE FROM entities WHERE id = ?'
|
|
216
|
+
),
|
|
217
|
+
|
|
218
|
+
// -- Edges --
|
|
219
|
+
insertEdge: db.prepare(
|
|
220
|
+
'INSERT INTO edges (source_id, target_id, relation, source_type, target_type) VALUES (?, ?, ?, ?, ?)'
|
|
221
|
+
),
|
|
222
|
+
getEdgesBySource: db.prepare(
|
|
223
|
+
'SELECT * FROM edges WHERE source_id = ? AND source_type = ?'
|
|
224
|
+
),
|
|
225
|
+
getEdgesByTarget: db.prepare(
|
|
226
|
+
'SELECT * FROM edges WHERE target_id = ? AND target_type = ?'
|
|
227
|
+
),
|
|
228
|
+
deleteEdgesByMemory: db.prepare(
|
|
229
|
+
`DELETE FROM edges WHERE
|
|
230
|
+
(source_id = ? AND source_type = 'memory') OR
|
|
231
|
+
(target_id = ? AND target_type = 'memory')`
|
|
232
|
+
),
|
|
233
|
+
|
|
234
|
+
// -- Dedup --
|
|
235
|
+
findMemoryByContent: db.prepare(
|
|
236
|
+
'SELECT id FROM memories WHERE content LIKE ? LIMIT 1'
|
|
237
|
+
)
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
// ============================================================
|
|
241
|
+
// CRUD FUNCTIONS
|
|
242
|
+
// Simple, one-purpose functions. No magic.
|
|
243
|
+
// ============================================================
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Insert a new memory into the memories table.
|
|
247
|
+
* FTS5 index is auto-updated via trigger.
|
|
248
|
+
* @returns {number} The new memory's ID
|
|
249
|
+
*/
|
|
250
|
+
export function insertMemory(content, importance = 1.0) {
|
|
251
|
+
const result = stmts.insertMemory.run(content, importance);
|
|
252
|
+
return Number(result.lastInsertRowid);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Store an embedding vector for a memory.
|
|
257
|
+
* @param {number} id - Memory ID (used as rowid in vec table)
|
|
258
|
+
* @param {Float32Array} embedding - 384-dim embedding vector
|
|
259
|
+
*/
|
|
260
|
+
export function insertVector(id, embedding) {
|
|
261
|
+
// better-sqlite3 needs Buffer, sqlite-vec needs BigInt for rowid
|
|
262
|
+
stmts.insertVec.run(BigInt(id), Buffer.from(embedding.buffer));
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Get a memory by ID. Boosts its importance on access.
|
|
267
|
+
* @returns {object|null} The memory row, or null if not found
|
|
268
|
+
*/
|
|
269
|
+
export function getMemory(id) {
|
|
270
|
+
const memory = stmts.getById.get(id);
|
|
271
|
+
if (memory) boostMemory(id);
|
|
272
|
+
return memory || null;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Get a memory by ID WITHOUT boosting. Used internally for search results.
|
|
277
|
+
* @returns {object|null} The memory row, or null if not found
|
|
278
|
+
*/
|
|
279
|
+
export function getMemoryById(id) {
|
|
280
|
+
return stmts.getById.get(id) || null;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Update a memory's content. FTS5 index auto-updates via trigger.
|
|
285
|
+
* Caller must also update the vector embedding separately.
|
|
286
|
+
* @returns {boolean} true if the memory existed and was updated
|
|
287
|
+
*/
|
|
288
|
+
export function updateMemoryContent(id, content) {
|
|
289
|
+
const result = stmts.updateContent.run(content, id);
|
|
290
|
+
return result.changes > 0;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Delete a vector embedding by memory ID.
|
|
295
|
+
*/
|
|
296
|
+
export function deleteVec(id) {
|
|
297
|
+
try { stmts.deleteVec.run(BigInt(id)); } catch (e) { /* may not exist */ }
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Delete a memory and its vector embedding.
|
|
302
|
+
* FTS5 index auto-updates via trigger.
|
|
303
|
+
* @returns {boolean} true if the memory existed and was deleted
|
|
304
|
+
*/
|
|
305
|
+
export function deleteMemory(id) {
|
|
306
|
+
deleteVec(id); // Remove vector first (no cascades on virtual tables)
|
|
307
|
+
const result = stmts.deleteMemory.run(id);
|
|
308
|
+
return result.changes > 0;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Get the N most recently created memories.
|
|
313
|
+
*/
|
|
314
|
+
export function getRecentMemories(limit = 10) {
|
|
315
|
+
return stmts.getRecent.all(limit);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Get the N most important memories (by importance_score).
|
|
320
|
+
*/
|
|
321
|
+
export function getImportantMemories(limit = 10) {
|
|
322
|
+
return stmts.getImportant.all(limit);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// ============================================================
|
|
326
|
+
// MEMORY LIFECYCLE
|
|
327
|
+
// ============================================================
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Boost a memory's importance when it's accessed.
|
|
331
|
+
* Increments access_count, adds 0.1 to importance (max 2.0),
|
|
332
|
+
* and updates last_accessed timestamp.
|
|
333
|
+
*/
|
|
334
|
+
export function boostMemory(id) {
|
|
335
|
+
stmts.boost.run(id);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Apply temporal decay to old memories.
|
|
340
|
+
* Reduces importance by 5% for memories not accessed in 7+ days.
|
|
341
|
+
* Called automatically every hour by the server.
|
|
342
|
+
*/
|
|
343
|
+
export function applyTemporalDecay() {
|
|
344
|
+
const now = Math.floor(Date.now() / 1000);
|
|
345
|
+
const result = stmts.decay.run(now);
|
|
346
|
+
if (result.changes > 0) {
|
|
347
|
+
console.error(`[persyst] Decay applied to ${result.changes} memories`);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// ============================================================
|
|
352
|
+
// SEARCH HELPERS (used by search.js)
|
|
353
|
+
// ============================================================
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Keyword search using FTS5 with BM25 ranking.
|
|
357
|
+
* @returns {Array<{id: number, rank: number}>}
|
|
358
|
+
*/
|
|
359
|
+
export function searchKeyword(query, limit = 10) {
|
|
360
|
+
try {
|
|
361
|
+
return stmts.searchFts.all(query, limit);
|
|
362
|
+
} catch (e) {
|
|
363
|
+
// FTS5 can throw on special characters in query
|
|
364
|
+
return [];
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Vector similarity search using sqlite-vec KNN.
|
|
370
|
+
* @param {Float32Array} embedding - Query vector (384-dim)
|
|
371
|
+
* @returns {Array<{rowid: number, distance: number}>}
|
|
372
|
+
*/
|
|
373
|
+
export function searchVector(embedding, limit = 10) {
|
|
374
|
+
return stmts.searchVec.all(Buffer.from(embedding.buffer), limit);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// ============================================================
|
|
378
|
+
// ENTITY FUNCTIONS (Knowledge Graph)
|
|
379
|
+
// ============================================================
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Create a named entity (person, tech, file, concept, etc.).
|
|
383
|
+
* Silently skips if entity with that name already exists.
|
|
384
|
+
* @returns {number|null} The entity ID, or null if already existed
|
|
385
|
+
*/
|
|
386
|
+
export function insertEntity(name, type) {
|
|
387
|
+
const result = stmts.insertEntity.run(name, type);
|
|
388
|
+
if (result.changes === 0) {
|
|
389
|
+
// Already exists — return existing ID
|
|
390
|
+
const existing = stmts.getEntityByName.get(name);
|
|
391
|
+
return existing ? existing.id : null;
|
|
392
|
+
}
|
|
393
|
+
return Number(result.lastInsertRowid);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Get an entity by its name.
|
|
398
|
+
*/
|
|
399
|
+
export function getEntityByName(name) {
|
|
400
|
+
return stmts.getEntityByName.get(name) || null;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Get an entity by its ID.
|
|
405
|
+
*/
|
|
406
|
+
export function getEntityById(id) {
|
|
407
|
+
return stmts.getEntityById.get(id) || null;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Get all entities, most recent first.
|
|
412
|
+
*/
|
|
413
|
+
export function getAllEntities(limit = 50) {
|
|
414
|
+
return stmts.getAllEntities.all(limit);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Delete an entity and its edges.
|
|
419
|
+
*/
|
|
420
|
+
export function deleteEntity(id) {
|
|
421
|
+
stmts.deleteEntity.run(id);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Create an edge connecting two nodes (entity↔entity or entity↔memory).
|
|
426
|
+
*/
|
|
427
|
+
export function insertEdge(sourceId, targetId, relation, sourceType, targetType) {
|
|
428
|
+
stmts.insertEdge.run(sourceId, targetId, relation, sourceType, targetType);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Get all memories linked to an entity.
|
|
433
|
+
*/
|
|
434
|
+
export function getMemoriesByEntity(entityId) {
|
|
435
|
+
// Find edges where this entity is the source pointing to memories
|
|
436
|
+
const edges = stmts.getEdgesBySource.all(entityId, 'entity');
|
|
437
|
+
const memoryEdges = edges.filter(e => e.target_type === 'memory');
|
|
438
|
+
return memoryEdges.map(e => stmts.getById.get(e.target_id)).filter(Boolean);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Check if a memory with similar content already exists.
|
|
443
|
+
* Used for deduplication during git ingestion.
|
|
444
|
+
* @param {string} pattern - SQL LIKE pattern to match
|
|
445
|
+
* @returns {boolean}
|
|
446
|
+
*/
|
|
447
|
+
export function memoryExists(pattern) {
|
|
448
|
+
return stmts.findMemoryByContent.get(pattern) !== undefined;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Delete a memory and clean up its edges.
|
|
453
|
+
*/
|
|
454
|
+
export function deleteMemoryFull(id) {
|
|
455
|
+
stmts.deleteEdgesByMemory.run(id, id);
|
|
456
|
+
deleteVec(id);
|
|
457
|
+
const result = stmts.deleteMemory.run(id);
|
|
458
|
+
return result.changes > 0;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// ============================================================
|
|
462
|
+
// CLEANUP
|
|
463
|
+
// ============================================================
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Close the database connection. Call on shutdown.
|
|
467
|
+
*/
|
|
468
|
+
export function closeDatabase() {
|
|
469
|
+
db.close();
|
|
470
|
+
console.error('[persyst] Database closed');
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
export default db;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* embeddings.js — Local Embedding Generation
|
|
3
|
+
*
|
|
4
|
+
* Uses @huggingface/transformers with the all-MiniLM-L6-v2 model
|
|
5
|
+
* to generate 384-dimensional embeddings entirely on your machine.
|
|
6
|
+
*
|
|
7
|
+
* - No API keys needed
|
|
8
|
+
* - No cloud calls
|
|
9
|
+
* - Model downloads once (~50MB), then cached locally
|
|
10
|
+
* - Returns Float32Array ready for sqlite-vec
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import './setup-wasm.js';
|
|
14
|
+
import { env, pipeline } from '@huggingface/transformers';
|
|
15
|
+
|
|
16
|
+
// Disable WASM caching to prevent blob: URL ESM dynamic import error in Node.js
|
|
17
|
+
env.useWasmCache = false;
|
|
18
|
+
|
|
19
|
+
// The embedding pipeline (lazy-loaded on first use)
|
|
20
|
+
let extractor = null;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Load the embedding model. Called automatically on first use.
|
|
24
|
+
* First run downloads the model (~50MB). Subsequent runs use cache.
|
|
25
|
+
*/
|
|
26
|
+
async function loadModel() {
|
|
27
|
+
if (extractor) return;
|
|
28
|
+
|
|
29
|
+
console.error('[persyst] Loading embedding model (first run downloads ~50MB)...');
|
|
30
|
+
extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
31
|
+
console.error('[persyst] Embedding model loaded ✓');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Generate a 384-dimensional embedding for the given text.
|
|
36
|
+
*
|
|
37
|
+
* @param {string} text - The text to embed
|
|
38
|
+
* @returns {Promise<Float32Array>} - Normalized 384-dim embedding vector
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* const vec = await generateEmbedding("User prefers dark mode");
|
|
42
|
+
* // vec is a Float32Array with 384 values
|
|
43
|
+
* // Use vec.buffer to insert into sqlite-vec
|
|
44
|
+
*/
|
|
45
|
+
export async function generateEmbedding(text) {
|
|
46
|
+
await loadModel();
|
|
47
|
+
|
|
48
|
+
const output = await extractor(text, {
|
|
49
|
+
pooling: 'mean', // Average all token embeddings into one vector
|
|
50
|
+
normalize: true // Normalize to unit length (required for cosine similarity)
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// output.data is already a flat Float32Array from the tensor
|
|
54
|
+
return new Float32Array(output.data);
|
|
55
|
+
}
|
package/src/git.js
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* git.js — Git Commit Ingestion
|
|
3
|
+
*
|
|
4
|
+
* Reads git log from a repository and converts commits into memories.
|
|
5
|
+
* Useful for giving coding agents context about a project's history.
|
|
6
|
+
*
|
|
7
|
+
* Each commit becomes a memory like:
|
|
8
|
+
* "[abc1234] Fix login bug — by John on 2024-01-15"
|
|
9
|
+
*
|
|
10
|
+
* Deduplicates by commit hash so you can ingest safely multiple times.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { execSync } from 'child_process';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Read the N most recent git commits from a repository.
|
|
17
|
+
*
|
|
18
|
+
* @param {string} repoPath - Absolute path to the git repo
|
|
19
|
+
* @param {number} count - Number of commits to read (default: 20)
|
|
20
|
+
* @returns {Array<{hash: string, message: string, author: string, date: string, fullText: string}>}
|
|
21
|
+
*/
|
|
22
|
+
export function getRecentCommits(repoPath, count = 20) {
|
|
23
|
+
try {
|
|
24
|
+
// Use a delimiter to split commits reliably
|
|
25
|
+
const DELIM = '---PERSYST-COMMIT---';
|
|
26
|
+
const format = `${DELIM}%n%H%n%an%n%ai%n%s%n%b`;
|
|
27
|
+
|
|
28
|
+
const output = execSync(
|
|
29
|
+
`git log -n ${count} --pretty=format:"${format}"`,
|
|
30
|
+
{
|
|
31
|
+
cwd: repoPath,
|
|
32
|
+
encoding: 'utf-8',
|
|
33
|
+
timeout: 10000, // 10s timeout
|
|
34
|
+
stdio: ['pipe', 'pipe', 'pipe'] // Suppress stderr
|
|
35
|
+
}
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
// Parse the output into commit objects
|
|
39
|
+
const commits = [];
|
|
40
|
+
const blocks = output.split(DELIM).filter(b => b.trim());
|
|
41
|
+
|
|
42
|
+
for (const block of blocks) {
|
|
43
|
+
const lines = block.trim().split('\n');
|
|
44
|
+
if (lines.length < 4) continue;
|
|
45
|
+
|
|
46
|
+
const hash = lines[0].trim();
|
|
47
|
+
const author = lines[1].trim();
|
|
48
|
+
const date = lines[2].trim().split(' ')[0]; // Just the date part
|
|
49
|
+
const subject = lines[3].trim();
|
|
50
|
+
const body = lines.slice(4).join(' ').trim();
|
|
51
|
+
|
|
52
|
+
// Build a readable memory string
|
|
53
|
+
const fullText = body
|
|
54
|
+
? `[${hash.slice(0, 7)}] ${subject} — by ${author} on ${date}. ${body}`
|
|
55
|
+
: `[${hash.slice(0, 7)}] ${subject} — by ${author} on ${date}`;
|
|
56
|
+
|
|
57
|
+
commits.push({ hash, message: subject, author, date, fullText });
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return commits;
|
|
61
|
+
} catch (err) {
|
|
62
|
+
// Not a git repo, or git not installed
|
|
63
|
+
const message = err.message || String(err);
|
|
64
|
+
if (message.includes('not a git repository')) {
|
|
65
|
+
throw new Error(`Not a git repository: ${repoPath}`);
|
|
66
|
+
}
|
|
67
|
+
if (message.includes('ENOENT') || message.includes('not recognized')) {
|
|
68
|
+
throw new Error('Git is not installed or not in PATH');
|
|
69
|
+
}
|
|
70
|
+
throw new Error(`Failed to read git log: ${message}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Get changed files from a specific commit.
|
|
76
|
+
* Useful for linking commits to file entities.
|
|
77
|
+
*
|
|
78
|
+
* @param {string} repoPath - Absolute path to the git repo
|
|
79
|
+
* @param {string} hash - Full commit hash
|
|
80
|
+
* @returns {string[]} List of changed file paths
|
|
81
|
+
*/
|
|
82
|
+
export function getCommitFiles(repoPath, hash) {
|
|
83
|
+
try {
|
|
84
|
+
const output = execSync(
|
|
85
|
+
`git diff-tree --no-commit-id --name-only -r ${hash}`,
|
|
86
|
+
{
|
|
87
|
+
cwd: repoPath,
|
|
88
|
+
encoding: 'utf-8',
|
|
89
|
+
timeout: 5000,
|
|
90
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
91
|
+
}
|
|
92
|
+
);
|
|
93
|
+
return output.trim().split('\n').filter(Boolean);
|
|
94
|
+
} catch {
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
}
|
package/src/search.js
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* search.js — Hybrid Search Engine
|
|
3
|
+
*
|
|
4
|
+
* Combines two search strategies for best results:
|
|
5
|
+
*
|
|
6
|
+
* 1. KEYWORD SEARCH (FTS5 + BM25)
|
|
7
|
+
* → Finds exact word matches. Fast. "React" finds "React".
|
|
8
|
+
*
|
|
9
|
+
* 2. SEMANTIC SEARCH (sqlite-vec + embeddings)
|
|
10
|
+
* → Finds by meaning. "dark mode" matches "night theme".
|
|
11
|
+
*
|
|
12
|
+
* 3. HYBRID = keyword + semantic merged
|
|
13
|
+
* → Keyword matches get a +0.2 score boost on top of semantic score.
|
|
14
|
+
* → Best of both worlds.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { generateEmbedding } from './embeddings.js';
|
|
18
|
+
import {
|
|
19
|
+
searchKeyword,
|
|
20
|
+
searchVector,
|
|
21
|
+
getMemoryById,
|
|
22
|
+
boostMemory
|
|
23
|
+
} from './database.js';
|
|
24
|
+
|
|
25
|
+
// ============================================================
|
|
26
|
+
// HYBRID SEARCH (the main export)
|
|
27
|
+
// ============================================================
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Search memories using both keyword and semantic strategies.
|
|
31
|
+
*
|
|
32
|
+
* How it works:
|
|
33
|
+
* 1. Run FTS5 keyword search → get matching memory IDs
|
|
34
|
+
* 2. Run vector semantic search → get memories ranked by meaning
|
|
35
|
+
* 3. If a memory appears in BOTH, boost its score by +0.2
|
|
36
|
+
* 4. Sort by combined score, return top N
|
|
37
|
+
*
|
|
38
|
+
* @param {string} queryText - What to search for
|
|
39
|
+
* @param {number} limit - Max results to return (default: 5)
|
|
40
|
+
* @returns {Promise<Array>} Ranked search results with scores
|
|
41
|
+
*
|
|
42
|
+
* @example
|
|
43
|
+
* const results = await searchHybrid("night theme", 5);
|
|
44
|
+
* // Will find memories about "dark mode" via semantic match
|
|
45
|
+
*/
|
|
46
|
+
export async function searchHybrid(queryText, limit = 5) {
|
|
47
|
+
// --- Step 1: Keyword search (fast, exact matches) ---
|
|
48
|
+
const keywordHits = searchKeyword(queryText, limit * 2);
|
|
49
|
+
const keywordIds = new Set(keywordHits.map(r => r.id));
|
|
50
|
+
|
|
51
|
+
// --- Step 2: Semantic search (meaning-based) ---
|
|
52
|
+
const queryEmbedding = await generateEmbedding(queryText);
|
|
53
|
+
const vecHits = searchVector(queryEmbedding, limit * 2);
|
|
54
|
+
|
|
55
|
+
const semanticResults = vecHits.map(r => ({
|
|
56
|
+
id: r.rowid,
|
|
57
|
+
distance: r.distance,
|
|
58
|
+
// Convert L2 distance to 0-1 similarity score
|
|
59
|
+
// For normalized vectors: cosine_sim = 1 - (L2_distance² / 2)
|
|
60
|
+
similarity: Math.max(0, 1 - (r.distance * r.distance) / 2)
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
// --- Step 3: Merge results with keyword boost ---
|
|
64
|
+
const combined = semanticResults.map(r => ({
|
|
65
|
+
id: r.id,
|
|
66
|
+
similarity: r.similarity,
|
|
67
|
+
hybrid_score: r.similarity + (keywordIds.has(r.id) ? 0.2 : 0),
|
|
68
|
+
keyword_match: keywordIds.has(r.id)
|
|
69
|
+
}));
|
|
70
|
+
|
|
71
|
+
// Add keyword-only hits that semantic search missed
|
|
72
|
+
const semanticIds = new Set(semanticResults.map(r => r.id));
|
|
73
|
+
for (const id of keywordIds) {
|
|
74
|
+
if (!semanticIds.has(id)) {
|
|
75
|
+
combined.push({
|
|
76
|
+
id,
|
|
77
|
+
similarity: 0,
|
|
78
|
+
hybrid_score: 0.2, // Keyword-only base score
|
|
79
|
+
keyword_match: true
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// --- Step 4: Sort by score, fetch full data, return top N ---
|
|
85
|
+
combined.sort((a, b) => b.hybrid_score - a.hybrid_score);
|
|
86
|
+
const topResults = combined.slice(0, limit);
|
|
87
|
+
|
|
88
|
+
const results = topResults
|
|
89
|
+
.map(r => {
|
|
90
|
+
const memory = getMemoryById(r.id);
|
|
91
|
+
if (!memory) return null; // Memory was deleted between search and fetch
|
|
92
|
+
|
|
93
|
+
// Boost importance since this memory was useful
|
|
94
|
+
boostMemory(r.id);
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
id: memory.id,
|
|
98
|
+
content: memory.content,
|
|
99
|
+
importance_score: memory.importance_score,
|
|
100
|
+
created_at: memory.created_at,
|
|
101
|
+
similarity: r.similarity.toFixed(4),
|
|
102
|
+
hybrid_score: r.hybrid_score.toFixed(4),
|
|
103
|
+
keyword_match: r.keyword_match
|
|
104
|
+
};
|
|
105
|
+
})
|
|
106
|
+
.filter(Boolean); // Remove nulls from deleted memories
|
|
107
|
+
|
|
108
|
+
return results;
|
|
109
|
+
}
|
package/src/server.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* server.js — MCP Server Setup
|
|
3
|
+
*
|
|
4
|
+
* Creates the MCP server, registers all tools, and connects
|
|
5
|
+
* via stdio transport (the standard MCP communication method).
|
|
6
|
+
*
|
|
7
|
+
* IMPORTANT: Never write to stdout — it's reserved for MCP protocol.
|
|
8
|
+
* All logging goes to stderr via console.error().
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
12
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
13
|
+
import { registerTools } from './tools.js';
|
|
14
|
+
import { applyTemporalDecay, closeDatabase } from './database.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Start the Persyst MCP server.
|
|
18
|
+
* This is called from index.js (the entry point).
|
|
19
|
+
*/
|
|
20
|
+
export async function startServer() {
|
|
21
|
+
// --- Create MCP server ---
|
|
22
|
+
const server = new McpServer({
|
|
23
|
+
name: 'persyst',
|
|
24
|
+
version: '1.0.0'
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
// --- Register all 7 tools ---
|
|
28
|
+
registerTools(server);
|
|
29
|
+
console.error('[persyst] 7 tools registered ✓');
|
|
30
|
+
|
|
31
|
+
// --- Start temporal decay timer ---
|
|
32
|
+
// Runs every hour: reduces importance of memories not accessed in 7+ days
|
|
33
|
+
const decayTimer = setInterval(applyTemporalDecay, 3600000);
|
|
34
|
+
|
|
35
|
+
// --- Graceful shutdown ---
|
|
36
|
+
const shutdown = () => {
|
|
37
|
+
console.error('[persyst] Shutting down...');
|
|
38
|
+
clearInterval(decayTimer);
|
|
39
|
+
closeDatabase();
|
|
40
|
+
process.exit(0);
|
|
41
|
+
};
|
|
42
|
+
process.on('SIGINT', shutdown);
|
|
43
|
+
process.on('SIGTERM', shutdown);
|
|
44
|
+
|
|
45
|
+
// --- Connect via stdio ---
|
|
46
|
+
// This is how Claude Code, Cursor, and Aider communicate with us
|
|
47
|
+
const transport = new StdioServerTransport();
|
|
48
|
+
await server.connect(transport);
|
|
49
|
+
|
|
50
|
+
console.error('[persyst] MCP server running on stdio ✓');
|
|
51
|
+
console.error('[persyst] Ready to receive tool calls');
|
|
52
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import * as ONNX_NODE from 'onnxruntime-node';
|
|
2
|
+
import * as ONNX_WEB from 'onnxruntime-web/webgpu';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { fileURLToPath, pathToFileURL } from 'url';
|
|
6
|
+
import { createRequire } from 'module';
|
|
7
|
+
|
|
8
|
+
const require = createRequire(import.meta.url);
|
|
9
|
+
const onnxWebPath = require.resolve('onnxruntime-web');
|
|
10
|
+
const wasmDir = path.dirname(onnxWebPath);
|
|
11
|
+
|
|
12
|
+
// Redirect native Node session creation to WebAssembly session creation
|
|
13
|
+
ONNX_NODE.InferenceSession.create = ONNX_WEB.InferenceSession.create;
|
|
14
|
+
|
|
15
|
+
// Override URL.createObjectURL to return file URL of the existing local file
|
|
16
|
+
const originalCreateObjectURL = URL.createObjectURL;
|
|
17
|
+
URL.createObjectURL = (blob) => {
|
|
18
|
+
const type = blob.type || '';
|
|
19
|
+
if (type.includes('javascript') || type.includes('mjs')) {
|
|
20
|
+
const filePath = path.join(wasmDir, 'ort-wasm-simd-threaded.asyncify.mjs');
|
|
21
|
+
const fileUrl = pathToFileURL(filePath).href;
|
|
22
|
+
return fileUrl;
|
|
23
|
+
}
|
|
24
|
+
return originalCreateObjectURL(blob);
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
// Override global fetch to load ONNX WASM binaries and model files from local disk
|
|
28
|
+
const originalFetch = globalThis.fetch;
|
|
29
|
+
globalThis.fetch = async (url, options) => {
|
|
30
|
+
const urlStr = typeof url === 'string' ? url : url.url;
|
|
31
|
+
|
|
32
|
+
let isLocal = false;
|
|
33
|
+
let filePath = '';
|
|
34
|
+
|
|
35
|
+
if (urlStr.startsWith('file://')) {
|
|
36
|
+
isLocal = true;
|
|
37
|
+
filePath = fileURLToPath(urlStr);
|
|
38
|
+
} else if (!urlStr.startsWith('http://') && !urlStr.startsWith('https://') && !urlStr.startsWith('data:')) {
|
|
39
|
+
isLocal = true;
|
|
40
|
+
filePath = urlStr;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Intercept onnxruntime-web CDN URLs and route them to node_modules/onnxruntime-web/dist
|
|
44
|
+
if (urlStr.includes('onnxruntime-web') || urlStr.includes('ort-wasm')) {
|
|
45
|
+
isLocal = true;
|
|
46
|
+
const filename = urlStr.split('/').pop().split('?')[0].split('#')[0];
|
|
47
|
+
filePath = path.join(wasmDir, filename);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (isLocal) {
|
|
51
|
+
filePath = path.normalize(filePath);
|
|
52
|
+
try {
|
|
53
|
+
const buffer = fs.readFileSync(filePath);
|
|
54
|
+
let contentType = 'application/octet-stream';
|
|
55
|
+
if (filePath.endsWith('.wasm')) contentType = 'application/wasm';
|
|
56
|
+
else if (filePath.endsWith('.mjs') || filePath.endsWith('.js')) contentType = 'text/javascript';
|
|
57
|
+
|
|
58
|
+
return new Response(buffer, {
|
|
59
|
+
status: 200,
|
|
60
|
+
statusText: 'OK',
|
|
61
|
+
headers: { 'Content-Type': contentType }
|
|
62
|
+
});
|
|
63
|
+
} catch (err) {
|
|
64
|
+
console.error('[persyst] Failed to read local file:', filePath, err.message);
|
|
65
|
+
throw err;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return originalFetch(url, options);
|
|
70
|
+
};
|
package/src/tools.js
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tools.js — MCP Tool Definitions & Handlers
|
|
3
|
+
*
|
|
4
|
+
* Defines all 11 tools that AI agents can call via MCP:
|
|
5
|
+
*
|
|
6
|
+
* Core (MVP):
|
|
7
|
+
* 1. add_memory — Store a new memory
|
|
8
|
+
* 2. search_memories — Hybrid keyword + semantic search
|
|
9
|
+
* 3. get_memory — Get one memory by ID
|
|
10
|
+
* 4. update_memory — Update content (re-embeds automatically)
|
|
11
|
+
* 5. delete_memory — Remove a memory permanently
|
|
12
|
+
* 6. get_recent_memories — Latest N memories
|
|
13
|
+
* 7. get_important_memories — Top N by importance
|
|
14
|
+
*
|
|
15
|
+
* Advanced (Phase 3):
|
|
16
|
+
* 8. ingest_git_commits — Import git history as memories
|
|
17
|
+
* 9. add_entity — Create a named entity
|
|
18
|
+
* 10. link_entity_memory — Connect entity ↔ memory
|
|
19
|
+
* 11. search_by_entity — Find memories linked to an entity
|
|
20
|
+
*
|
|
21
|
+
* Uses Zod schemas for input validation (required by McpServer).
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { z } from 'zod';
|
|
25
|
+
import { generateEmbedding } from './embeddings.js';
|
|
26
|
+
import {
|
|
27
|
+
insertMemory,
|
|
28
|
+
insertVector,
|
|
29
|
+
getMemory,
|
|
30
|
+
updateMemoryContent,
|
|
31
|
+
deleteMemory,
|
|
32
|
+
deleteVec,
|
|
33
|
+
getRecentMemories,
|
|
34
|
+
getImportantMemories,
|
|
35
|
+
insertEntity,
|
|
36
|
+
getEntityByName,
|
|
37
|
+
insertEdge,
|
|
38
|
+
getMemoriesByEntity,
|
|
39
|
+
getAllEntities,
|
|
40
|
+
memoryExists
|
|
41
|
+
} from './database.js';
|
|
42
|
+
import { searchHybrid } from './search.js';
|
|
43
|
+
import { getRecentCommits } from './git.js';
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Register all MCP tools on the server.
|
|
47
|
+
* @param {McpServer} server - The MCP server instance
|
|
48
|
+
*/
|
|
49
|
+
export function registerTools(server) {
|
|
50
|
+
|
|
51
|
+
// ========================================
|
|
52
|
+
// 1. ADD MEMORY
|
|
53
|
+
// ========================================
|
|
54
|
+
server.tool(
|
|
55
|
+
'add_memory',
|
|
56
|
+
'Store a new memory. It will be searchable by both keywords and meaning.',
|
|
57
|
+
{
|
|
58
|
+
content: z.string().describe('The memory content to store'),
|
|
59
|
+
importance: z.number().min(0).max(1).default(1.0)
|
|
60
|
+
.describe('Importance score from 0 (low) to 1 (high)')
|
|
61
|
+
},
|
|
62
|
+
async ({ content, importance }) => {
|
|
63
|
+
const id = insertMemory(content, importance);
|
|
64
|
+
const embedding = await generateEmbedding(content);
|
|
65
|
+
insertVector(id, embedding);
|
|
66
|
+
|
|
67
|
+
return text({ success: true, id, message: `Memory #${id} stored` });
|
|
68
|
+
}
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
// ========================================
|
|
72
|
+
// 2. SEARCH MEMORIES
|
|
73
|
+
// ========================================
|
|
74
|
+
server.tool(
|
|
75
|
+
'search_memories',
|
|
76
|
+
'Search memories using hybrid keyword + semantic search. Finds exact matches AND similar meanings (e.g. "dark mode" finds "night theme").',
|
|
77
|
+
{
|
|
78
|
+
query: z.string().describe('What to search for'),
|
|
79
|
+
limit: z.number().default(5).describe('Max results (default: 5)')
|
|
80
|
+
},
|
|
81
|
+
async ({ query, limit }) => {
|
|
82
|
+
const results = await searchHybrid(query, limit);
|
|
83
|
+
return text({ results, count: results.length });
|
|
84
|
+
}
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
// ========================================
|
|
88
|
+
// 3. GET MEMORY
|
|
89
|
+
// ========================================
|
|
90
|
+
server.tool(
|
|
91
|
+
'get_memory',
|
|
92
|
+
'Get a specific memory by its ID. Boosts its importance automatically.',
|
|
93
|
+
{
|
|
94
|
+
id: z.number().describe('Memory ID to retrieve')
|
|
95
|
+
},
|
|
96
|
+
async ({ id }) => {
|
|
97
|
+
const memory = getMemory(id);
|
|
98
|
+
if (!memory) return text({ error: `Memory #${id} not found` });
|
|
99
|
+
return text(memory);
|
|
100
|
+
}
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
// ========================================
|
|
104
|
+
// 4. UPDATE MEMORY
|
|
105
|
+
// ========================================
|
|
106
|
+
server.tool(
|
|
107
|
+
'update_memory',
|
|
108
|
+
'Update the content of an existing memory. Automatically re-generates the search embedding.',
|
|
109
|
+
{
|
|
110
|
+
id: z.number().describe('Memory ID to update'),
|
|
111
|
+
content: z.string().describe('New memory content')
|
|
112
|
+
},
|
|
113
|
+
async ({ id, content }) => {
|
|
114
|
+
const updated = updateMemoryContent(id, content);
|
|
115
|
+
if (!updated) return text({ error: `Memory #${id} not found` });
|
|
116
|
+
|
|
117
|
+
// Re-generate embedding for updated content
|
|
118
|
+
const embedding = await generateEmbedding(content);
|
|
119
|
+
deleteVec(id);
|
|
120
|
+
insertVector(id, embedding);
|
|
121
|
+
|
|
122
|
+
return text({ success: true, id, message: `Memory #${id} updated` });
|
|
123
|
+
}
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
// ========================================
|
|
127
|
+
// 5. DELETE MEMORY
|
|
128
|
+
// ========================================
|
|
129
|
+
server.tool(
|
|
130
|
+
'delete_memory',
|
|
131
|
+
'Permanently delete a memory by its ID.',
|
|
132
|
+
{
|
|
133
|
+
id: z.number().describe('Memory ID to delete')
|
|
134
|
+
},
|
|
135
|
+
async ({ id }) => {
|
|
136
|
+
const deleted = deleteMemory(id);
|
|
137
|
+
if (!deleted) return text({ error: `Memory #${id} not found` });
|
|
138
|
+
return text({ success: true, id, message: `Memory #${id} deleted` });
|
|
139
|
+
}
|
|
140
|
+
);
|
|
141
|
+
|
|
142
|
+
// ========================================
|
|
143
|
+
// 6. GET RECENT MEMORIES
|
|
144
|
+
// ========================================
|
|
145
|
+
server.tool(
|
|
146
|
+
'get_recent_memories',
|
|
147
|
+
'Get the most recently created memories, newest first.',
|
|
148
|
+
{
|
|
149
|
+
limit: z.number().default(10).describe('How many to return (default: 10)')
|
|
150
|
+
},
|
|
151
|
+
async ({ limit }) => {
|
|
152
|
+
const memories = getRecentMemories(limit);
|
|
153
|
+
return text({ memories, count: memories.length });
|
|
154
|
+
}
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
// ========================================
|
|
158
|
+
// 7. GET IMPORTANT MEMORIES
|
|
159
|
+
// ========================================
|
|
160
|
+
server.tool(
|
|
161
|
+
'get_important_memories',
|
|
162
|
+
'Get memories ranked by importance score, highest first.',
|
|
163
|
+
{
|
|
164
|
+
limit: z.number().default(10).describe('How many to return (default: 10)')
|
|
165
|
+
},
|
|
166
|
+
async ({ limit }) => {
|
|
167
|
+
const memories = getImportantMemories(limit);
|
|
168
|
+
return text({ memories, count: memories.length });
|
|
169
|
+
}
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
// ========================================
|
|
173
|
+
// 8. INGEST GIT COMMITS
|
|
174
|
+
// ========================================
|
|
175
|
+
server.tool(
|
|
176
|
+
'ingest_git_commits',
|
|
177
|
+
'Import recent git commits from a repository as memories. Each commit becomes a searchable memory. Deduplicates automatically — safe to call multiple times.',
|
|
178
|
+
{
|
|
179
|
+
repo_path: z.string().describe('Absolute path to the git repository'),
|
|
180
|
+
count: z.number().default(20).describe('Number of recent commits to import (default: 20)')
|
|
181
|
+
},
|
|
182
|
+
async ({ repo_path, count }) => {
|
|
183
|
+
try {
|
|
184
|
+
const commits = getRecentCommits(repo_path, count);
|
|
185
|
+
let added = 0;
|
|
186
|
+
let skipped = 0;
|
|
187
|
+
|
|
188
|
+
for (const commit of commits) {
|
|
189
|
+
// Dedup by commit hash prefix
|
|
190
|
+
const hashPrefix = commit.hash.slice(0, 7);
|
|
191
|
+
if (memoryExists(`[${hashPrefix}]%`)) {
|
|
192
|
+
skipped++;
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Store commit as memory
|
|
197
|
+
const id = insertMemory(commit.fullText, 0.6);
|
|
198
|
+
const embedding = await generateEmbedding(commit.fullText);
|
|
199
|
+
insertVector(id, embedding);
|
|
200
|
+
|
|
201
|
+
// Auto-create author entity and link
|
|
202
|
+
const authorId = insertEntity(commit.author, 'person');
|
|
203
|
+
if (authorId) {
|
|
204
|
+
insertEdge(authorId, id, 'authored', 'entity', 'memory');
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
added++;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return text({
|
|
211
|
+
success: true,
|
|
212
|
+
added,
|
|
213
|
+
skipped,
|
|
214
|
+
total_commits: commits.length,
|
|
215
|
+
message: `Ingested ${added} commits (${skipped} already existed)`
|
|
216
|
+
});
|
|
217
|
+
} catch (err) {
|
|
218
|
+
return text({ error: err.message });
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
// ========================================
|
|
224
|
+
// 9. ADD ENTITY
|
|
225
|
+
// ========================================
|
|
226
|
+
server.tool(
|
|
227
|
+
'add_entity',
|
|
228
|
+
'Create a named entity (person, tech, project, concept, file). Entities can be linked to memories for graph traversal.',
|
|
229
|
+
{
|
|
230
|
+
name: z.string().describe('Entity name (e.g. "React", "John", "auth-service")'),
|
|
231
|
+
type: z.string().describe('Entity type: person, tech, project, concept, file')
|
|
232
|
+
},
|
|
233
|
+
async ({ name, type }) => {
|
|
234
|
+
const id = insertEntity(name, type);
|
|
235
|
+
return text({ success: true, id, name, type, message: `Entity "${name}" created` });
|
|
236
|
+
}
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
// ========================================
|
|
240
|
+
// 10. LINK ENTITY TO MEMORY
|
|
241
|
+
// ========================================
|
|
242
|
+
server.tool(
|
|
243
|
+
'link_entity_memory',
|
|
244
|
+
'Connect an entity to a memory with a relationship label (e.g. "mentions", "is_about", "decided_by").',
|
|
245
|
+
{
|
|
246
|
+
entity_name: z.string().describe('Name of the entity'),
|
|
247
|
+
memory_id: z.number().describe('ID of the memory to link'),
|
|
248
|
+
relation: z.string().default('mentions').describe('Relationship type (e.g. mentions, is_about, decided_by)')
|
|
249
|
+
},
|
|
250
|
+
async ({ entity_name, memory_id, relation }) => {
|
|
251
|
+
const entity = getEntityByName(entity_name);
|
|
252
|
+
if (!entity) return text({ error: `Entity "${entity_name}" not found. Create it first with add_entity.` });
|
|
253
|
+
|
|
254
|
+
const memory = getMemory(memory_id);
|
|
255
|
+
if (!memory) return text({ error: `Memory #${memory_id} not found` });
|
|
256
|
+
|
|
257
|
+
insertEdge(entity.id, memory_id, relation, 'entity', 'memory');
|
|
258
|
+
return text({ success: true, entity: entity_name, memory_id, relation, message: `Linked "${entity_name}" → memory #${memory_id}` });
|
|
259
|
+
}
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
// ========================================
|
|
263
|
+
// 11. SEARCH BY ENTITY
|
|
264
|
+
// ========================================
|
|
265
|
+
server.tool(
|
|
266
|
+
'search_by_entity',
|
|
267
|
+
'Find all memories linked to a specific entity. Returns memories connected via edges in the knowledge graph.',
|
|
268
|
+
{
|
|
269
|
+
entity_name: z.string().describe('Name of the entity to search for')
|
|
270
|
+
},
|
|
271
|
+
async ({ entity_name }) => {
|
|
272
|
+
const entity = getEntityByName(entity_name);
|
|
273
|
+
if (!entity) return text({ error: `Entity "${entity_name}" not found` });
|
|
274
|
+
|
|
275
|
+
const memories = getMemoriesByEntity(entity.id);
|
|
276
|
+
return text({ entity, memories, count: memories.length });
|
|
277
|
+
}
|
|
278
|
+
);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ============================================================
|
|
282
|
+
// HELPER
|
|
283
|
+
// ============================================================
|
|
284
|
+
|
|
285
|
+
/** Format a response as MCP text content */
|
|
286
|
+
function text(data) {
|
|
287
|
+
return {
|
|
288
|
+
content: [{ type: 'text', text: JSON.stringify(data, null, 2) }]
|
|
289
|
+
};
|
|
290
|
+
}
|