@aeriondyseti/vector-memory-mcp 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -4
- package/scripts/lancedb-extract.ts +106 -0
- package/src/db/connection.ts +2 -6
- package/src/db/conversation.repository.ts +3 -8
- package/src/db/memory.repository.ts +4 -9
- package/src/db/migrations.ts +50 -12
- package/src/db/sqlite-utils.ts +45 -2
- package/src/migration.ts +90 -146
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aeriondyseti/vector-memory-mcp",
|
|
3
|
-
"version": "2.2.
|
|
3
|
+
"version": "2.2.1",
|
|
4
4
|
"description": "A zero-configuration RAG memory server for MCP clients",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -49,10 +49,8 @@
|
|
|
49
49
|
"@huggingface/transformers": "^3.8.0",
|
|
50
50
|
"@lancedb/lancedb": "^0.26.2",
|
|
51
51
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
52
|
-
"apache-arrow": "^21.1.0",
|
|
53
52
|
"arg": "^5.0.2",
|
|
54
|
-
"hono": "^4.11.3"
|
|
55
|
-
"sqlite-vec": "^0.1.6"
|
|
53
|
+
"hono": "^4.11.3"
|
|
56
54
|
},
|
|
57
55
|
"devDependencies": {
|
|
58
56
|
"@types/bun": "latest",
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Standalone LanceDB data extractor — runs in a child process so that
|
|
4
|
+
* @lancedb/lancedb native bindings never coexist with bun:sqlite's
|
|
5
|
+
* extension loading in the same process.
|
|
6
|
+
*
|
|
7
|
+
* Usage: bun scripts/lancedb-extract.ts <lance-db-path>
|
|
8
|
+
* Output: JSON on stdout — { memories: Row[], conversations: Row[] }
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const source = process.argv[2];
|
|
12
|
+
if (!source) {
|
|
13
|
+
console.error("Usage: bun scripts/lancedb-extract.ts <lance-db-path>");
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function toEpochMs(value: unknown): number {
|
|
18
|
+
if (typeof value === "number") return value;
|
|
19
|
+
if (value instanceof Date) return value.getTime();
|
|
20
|
+
if (typeof value === "bigint") return Number(value);
|
|
21
|
+
return Date.now();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function toFloatArray(vec: unknown): number[] {
|
|
25
|
+
if (Array.isArray(vec)) return vec;
|
|
26
|
+
if (vec instanceof Float32Array) return Array.from(vec);
|
|
27
|
+
if (vec && typeof (vec as any).toArray === "function") {
|
|
28
|
+
return Array.from((vec as any).toArray());
|
|
29
|
+
}
|
|
30
|
+
if (ArrayBuffer.isView(vec)) {
|
|
31
|
+
const view = vec as DataView;
|
|
32
|
+
return Array.from(new Float32Array(view.buffer, view.byteOffset, view.byteLength / 4));
|
|
33
|
+
}
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const lancedb = await import("@lancedb/lancedb");
|
|
38
|
+
const db = await lancedb.connect(source);
|
|
39
|
+
const tableNames = await db.tableNames();
|
|
40
|
+
console.error(`Found tables: ${tableNames.join(", ")}`);
|
|
41
|
+
|
|
42
|
+
const result: { memories: any[]; conversations: any[] } = {
|
|
43
|
+
memories: [],
|
|
44
|
+
conversations: [],
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const BATCH_SIZE = 500;
|
|
48
|
+
|
|
49
|
+
if (tableNames.includes("memories")) {
|
|
50
|
+
const table = await db.openTable("memories");
|
|
51
|
+
const total = await table.countRows();
|
|
52
|
+
console.error(`Reading ${total} memories...`);
|
|
53
|
+
|
|
54
|
+
let offset = 0;
|
|
55
|
+
while (true) {
|
|
56
|
+
const rows = await table.query().limit(BATCH_SIZE).offset(offset).toArray();
|
|
57
|
+
if (rows.length === 0) break;
|
|
58
|
+
for (const row of rows) {
|
|
59
|
+
result.memories.push({
|
|
60
|
+
id: row.id,
|
|
61
|
+
content: row.content,
|
|
62
|
+
metadata: row.metadata ?? "{}",
|
|
63
|
+
vector: toFloatArray(row.vector),
|
|
64
|
+
created_at: toEpochMs(row.created_at),
|
|
65
|
+
updated_at: toEpochMs(row.updated_at),
|
|
66
|
+
last_accessed: row.last_accessed != null ? toEpochMs(row.last_accessed) : null,
|
|
67
|
+
superseded_by: row.superseded_by ?? null,
|
|
68
|
+
usefulness: row.usefulness ?? 0,
|
|
69
|
+
access_count: row.access_count ?? 0,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
offset += BATCH_SIZE;
|
|
73
|
+
}
|
|
74
|
+
console.error(` ${result.memories.length} memories read`);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (tableNames.includes("conversation_history")) {
|
|
78
|
+
const table = await db.openTable("conversation_history");
|
|
79
|
+
const total = await table.countRows();
|
|
80
|
+
console.error(`Reading ${total} conversation chunks...`);
|
|
81
|
+
|
|
82
|
+
let offset = 0;
|
|
83
|
+
while (true) {
|
|
84
|
+
const rows = await table.query().limit(BATCH_SIZE).offset(offset).toArray();
|
|
85
|
+
if (rows.length === 0) break;
|
|
86
|
+
for (const row of rows) {
|
|
87
|
+
result.conversations.push({
|
|
88
|
+
id: row.id,
|
|
89
|
+
content: row.content,
|
|
90
|
+
metadata: row.metadata ?? "{}",
|
|
91
|
+
vector: toFloatArray(row.vector),
|
|
92
|
+
created_at: toEpochMs(row.created_at),
|
|
93
|
+
session_id: row.session_id,
|
|
94
|
+
role: row.role,
|
|
95
|
+
message_index_start: row.message_index_start ?? 0,
|
|
96
|
+
message_index_end: row.message_index_end ?? 0,
|
|
97
|
+
project: row.project ?? "",
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
offset += BATCH_SIZE;
|
|
101
|
+
}
|
|
102
|
+
console.error(` ${result.conversations.length} conversation chunks read`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
await db.close?.();
|
|
106
|
+
process.stdout.write(JSON.stringify(result));
|
package/src/db/connection.ts
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import { Database } from "bun:sqlite";
|
|
2
|
-
import * as sqliteVec from "sqlite-vec";
|
|
3
2
|
import { mkdirSync } from "fs";
|
|
4
3
|
import { dirname } from "path";
|
|
5
4
|
import { runMigrations } from "./migrations.js";
|
|
6
5
|
|
|
7
6
|
/**
|
|
8
|
-
* Open (or create) a SQLite database at the given path
|
|
9
|
-
*
|
|
7
|
+
* Open (or create) a SQLite database at the given path
|
|
8
|
+
* and run schema migrations.
|
|
10
9
|
*/
|
|
11
10
|
export function connectToDatabase(dbPath: string): Database {
|
|
12
11
|
mkdirSync(dirname(dbPath), { recursive: true });
|
|
@@ -15,9 +14,6 @@ export function connectToDatabase(dbPath: string): Database {
|
|
|
15
14
|
// WAL mode for concurrent read performance
|
|
16
15
|
db.exec("PRAGMA journal_mode=WAL");
|
|
17
16
|
|
|
18
|
-
// Load sqlite-vec extension
|
|
19
|
-
sqliteVec.load(db);
|
|
20
|
-
|
|
21
17
|
// Ensure schema is up to date
|
|
22
18
|
runMigrations(db);
|
|
23
19
|
|
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
sanitizeFtsQuery,
|
|
10
10
|
hybridRRF,
|
|
11
11
|
topByRRF,
|
|
12
|
+
knnSearch,
|
|
12
13
|
} from "./sqlite-utils.js";
|
|
13
14
|
|
|
14
15
|
export class ConversationRepository {
|
|
@@ -112,14 +113,8 @@ export class ConversationRepository {
|
|
|
112
113
|
): Promise<ConversationHybridRow[]> {
|
|
113
114
|
const candidateCount = limit * 3;
|
|
114
115
|
|
|
115
|
-
// Vector KNN search
|
|
116
|
-
const vecResults = this.db
|
|
117
|
-
.prepare(
|
|
118
|
-
`SELECT id FROM conversation_history_vec
|
|
119
|
-
WHERE vector MATCH ? AND k = ?
|
|
120
|
-
ORDER BY distance`
|
|
121
|
-
)
|
|
122
|
-
.all(serializeVector(embedding), candidateCount) as Array<{ id: string }>;
|
|
116
|
+
// Vector KNN search (brute-force cosine similarity in JS)
|
|
117
|
+
const vecResults = knnSearch(this.db, "conversation_history_vec", embedding, candidateCount);
|
|
123
118
|
|
|
124
119
|
// FTS5 search
|
|
125
120
|
const ftsQuery = sanitizeFtsQuery(query);
|
|
@@ -6,6 +6,7 @@ import {
|
|
|
6
6
|
sanitizeFtsQuery,
|
|
7
7
|
hybridRRF,
|
|
8
8
|
topByRRF,
|
|
9
|
+
knnSearch,
|
|
9
10
|
} from "./sqlite-utils.js";
|
|
10
11
|
import {
|
|
11
12
|
type Memory,
|
|
@@ -46,7 +47,7 @@ export class MemoryRepository {
|
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
/**
|
|
49
|
-
* Fetch the embedding vector for a memory id
|
|
50
|
+
* Fetch the embedding vector for a memory id.
|
|
50
51
|
*/
|
|
51
52
|
private getEmbedding(id: string): number[] {
|
|
52
53
|
const row = this.db
|
|
@@ -110,7 +111,6 @@ export class MemoryRepository {
|
|
|
110
111
|
memory.lastAccessed?.getTime() ?? null,
|
|
111
112
|
);
|
|
112
113
|
|
|
113
|
-
// vec0 virtual tables don't support REPLACE — delete then insert
|
|
114
114
|
this.db.prepare("DELETE FROM memories_vec WHERE id = ?").run(memory.id);
|
|
115
115
|
this.db
|
|
116
116
|
.prepare("INSERT INTO memories_vec (id, vector) VALUES (?, ?)")
|
|
@@ -170,14 +170,9 @@ export class MemoryRepository {
|
|
|
170
170
|
limit: number,
|
|
171
171
|
): Promise<HybridRow[]> {
|
|
172
172
|
const candidateLimit = limit * 3;
|
|
173
|
-
const vecBuf = serializeVector(embedding);
|
|
174
173
|
|
|
175
|
-
// Vector KNN search
|
|
176
|
-
const vectorResults = this.db
|
|
177
|
-
.prepare(
|
|
178
|
-
"SELECT id, distance FROM memories_vec WHERE vector MATCH ? AND k = ? ORDER BY distance",
|
|
179
|
-
)
|
|
180
|
-
.all(vecBuf, candidateLimit) as Array<{ id: string; distance: number }>;
|
|
174
|
+
// Vector KNN search (brute-force cosine similarity in JS)
|
|
175
|
+
const vectorResults = knnSearch(this.db, "memories_vec", embedding, candidateLimit);
|
|
181
176
|
|
|
182
177
|
// Full-text search
|
|
183
178
|
const ftsQuery = sanitizeFtsQuery(query);
|
package/src/db/migrations.ts
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
import type { Database } from "bun:sqlite";
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Check if a table exists and is a vec0 virtual table (from the old sqlite-vec schema).
|
|
5
|
+
*/
|
|
6
|
+
function isVec0Table(db: Database, tableName: string): boolean {
|
|
7
|
+
const row = db
|
|
8
|
+
.prepare(
|
|
9
|
+
`SELECT sql FROM sqlite_master WHERE type = 'table' AND name = ?`,
|
|
10
|
+
)
|
|
11
|
+
.get(tableName) as { sql: string } | null;
|
|
12
|
+
return row?.sql?.toLowerCase().includes("vec0") ?? false;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Migrate a vec0 virtual table to a plain BLOB table.
|
|
17
|
+
* Copies id + vector data, drops the vec0 table and its shadow tables, then
|
|
18
|
+
* creates the new plain table with the copied data.
|
|
19
|
+
*/
|
|
20
|
+
function migrateVec0ToBlob(db: Database, tableName: string): void {
|
|
21
|
+
const tmpTable = `${tableName}_migration_tmp`;
|
|
22
|
+
|
|
23
|
+
db.exec(`CREATE TABLE IF NOT EXISTS ${tmpTable} (id TEXT PRIMARY KEY, vector BLOB NOT NULL)`);
|
|
24
|
+
db.exec(`INSERT OR IGNORE INTO ${tmpTable} (id, vector) SELECT id, vector FROM ${tableName}`);
|
|
25
|
+
db.exec(`DROP TABLE ${tableName}`);
|
|
26
|
+
db.exec(`CREATE TABLE ${tableName} (id TEXT PRIMARY KEY, vector BLOB NOT NULL)`);
|
|
27
|
+
db.exec(`INSERT INTO ${tableName} (id, vector) SELECT id, vector FROM ${tmpTable}`);
|
|
28
|
+
db.exec(`DROP TABLE ${tmpTable}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
3
31
|
/**
|
|
4
32
|
* Run all schema migrations. Safe to call on every startup (uses IF NOT EXISTS).
|
|
5
33
|
*/
|
|
@@ -19,12 +47,17 @@ export function runMigrations(db: Database): void {
|
|
|
19
47
|
)
|
|
20
48
|
`);
|
|
21
49
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
50
|
+
// Migrate vec0 -> plain blob table if upgrading from sqlite-vec schema
|
|
51
|
+
if (isVec0Table(db, "memories_vec")) {
|
|
52
|
+
migrateVec0ToBlob(db, "memories_vec");
|
|
53
|
+
} else {
|
|
54
|
+
db.exec(`
|
|
55
|
+
CREATE TABLE IF NOT EXISTS memories_vec (
|
|
56
|
+
id TEXT PRIMARY KEY,
|
|
57
|
+
vector BLOB NOT NULL
|
|
58
|
+
)
|
|
59
|
+
`);
|
|
60
|
+
}
|
|
28
61
|
|
|
29
62
|
db.exec(`
|
|
30
63
|
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
|
@@ -48,12 +81,17 @@ export function runMigrations(db: Database): void {
|
|
|
48
81
|
)
|
|
49
82
|
`);
|
|
50
83
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
84
|
+
// Migrate vec0 -> plain blob table if upgrading from sqlite-vec schema
|
|
85
|
+
if (isVec0Table(db, "conversation_history_vec")) {
|
|
86
|
+
migrateVec0ToBlob(db, "conversation_history_vec");
|
|
87
|
+
} else {
|
|
88
|
+
db.exec(`
|
|
89
|
+
CREATE TABLE IF NOT EXISTS conversation_history_vec (
|
|
90
|
+
id TEXT PRIMARY KEY,
|
|
91
|
+
vector BLOB NOT NULL
|
|
92
|
+
)
|
|
93
|
+
`);
|
|
94
|
+
}
|
|
57
95
|
|
|
58
96
|
db.exec(`
|
|
59
97
|
CREATE VIRTUAL TABLE IF NOT EXISTS conversation_history_fts USING fts5(
|
package/src/db/sqlite-utils.ts
CHANGED
|
@@ -4,19 +4,62 @@ import type { Database } from "bun:sqlite";
|
|
|
4
4
|
export const RRF_K = 60;
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
* Serialize a number[] embedding to
|
|
7
|
+
* Serialize a number[] embedding to raw float32 bytes for BLOB storage.
|
|
8
8
|
*/
|
|
9
9
|
export function serializeVector(vec: number[]): Buffer {
|
|
10
10
|
return Buffer.from(new Float32Array(vec).buffer);
|
|
11
11
|
}
|
|
12
12
|
|
|
13
13
|
/**
|
|
14
|
-
* Deserialize raw float32 bytes
|
|
14
|
+
* Deserialize raw float32 bytes back to number[].
|
|
15
15
|
*/
|
|
16
16
|
export function deserializeVector(buf: Buffer): number[] {
|
|
17
17
|
return Array.from(new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4));
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
+
/**
|
|
21
|
+
* Cosine similarity between two pre-normalized Float32Arrays.
|
|
22
|
+
* Returns dot product (equivalent to cosine sim when vectors are unit-length).
|
|
23
|
+
*/
|
|
24
|
+
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
25
|
+
let dot = 0;
|
|
26
|
+
for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
|
|
27
|
+
return dot;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Brute-force KNN search over a vector blob table.
|
|
32
|
+
* Loads all vectors, computes cosine similarity, returns top-K results
|
|
33
|
+
* sorted by descending similarity (ascending distance).
|
|
34
|
+
*/
|
|
35
|
+
type VecTable = "memories_vec" | "conversation_history_vec";
|
|
36
|
+
|
|
37
|
+
export function knnSearch(
|
|
38
|
+
db: Database,
|
|
39
|
+
table: VecTable,
|
|
40
|
+
queryVec: number[],
|
|
41
|
+
k: number,
|
|
42
|
+
): Array<{ id: string; distance: number }> {
|
|
43
|
+
const rows = db
|
|
44
|
+
.prepare(`SELECT id, vector FROM ${table}`)
|
|
45
|
+
.all() as Array<{ id: string; vector: Buffer }>;
|
|
46
|
+
|
|
47
|
+
const qv = new Float32Array(queryVec);
|
|
48
|
+
const scored = rows.map((r) => {
|
|
49
|
+
const vec = new Float32Array(
|
|
50
|
+
r.vector.buffer,
|
|
51
|
+
r.vector.byteOffset,
|
|
52
|
+
r.vector.byteLength / 4,
|
|
53
|
+
);
|
|
54
|
+
const sim = cosineSimilarity(qv, vec);
|
|
55
|
+
// Convert similarity to distance (1 - sim) for consistency with previous API
|
|
56
|
+
return { id: r.id, distance: 1 - sim };
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
scored.sort((a, b) => a.distance - b.distance);
|
|
60
|
+
return scored.slice(0, k);
|
|
61
|
+
}
|
|
62
|
+
|
|
20
63
|
/**
|
|
21
64
|
* Sanitize a user query for FTS5 by quoting each token as a literal.
|
|
22
65
|
* Prevents FTS5 syntax errors from special characters like AND, OR, *, etc.
|
package/src/migration.ts
CHANGED
|
@@ -1,57 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* LanceDB
|
|
2
|
+
* LanceDB -> SQLite migration logic.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Reads LanceDB data in a child process (scripts/lancedb-extract.ts) to avoid
|
|
5
|
+
* a native symbol collision between @lancedb/lancedb and bun:sqlite.
|
|
6
|
+
* The extracted JSON is then written to SQLite in-process.
|
|
6
7
|
*
|
|
7
8
|
* @deprecated Will be removed in the next major version once LanceDB
|
|
8
9
|
* support is dropped.
|
|
9
10
|
*/
|
|
10
11
|
|
|
11
12
|
import { existsSync, statSync } from "fs";
|
|
13
|
+
import { resolve, dirname } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
12
15
|
import { connectToDatabase } from "./db/connection.js";
|
|
13
16
|
import { serializeVector } from "./db/sqlite-utils.js";
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
function toEpochMs(value: unknown): number {
|
|
18
|
-
if (typeof value === "number") return value;
|
|
19
|
-
if (value instanceof Date) return value.getTime();
|
|
20
|
-
if (typeof value === "bigint") return Number(value);
|
|
21
|
-
console.warn(`⚠️ Unexpected timestamp type: ${typeof value} (value: ${value}), using current time`);
|
|
22
|
-
return Date.now();
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
function toFloatArray(vec: unknown): number[] {
|
|
26
|
-
if (Array.isArray(vec)) return vec;
|
|
27
|
-
if (vec instanceof Float32Array) return Array.from(vec);
|
|
28
|
-
// Arrow Vector objects have a .toArray() method that returns Float32Array
|
|
29
|
-
if (vec && typeof (vec as any).toArray === "function") {
|
|
30
|
-
return Array.from((vec as any).toArray());
|
|
31
|
-
}
|
|
32
|
-
if (ArrayBuffer.isView(vec)) {
|
|
33
|
-
const view = vec as DataView;
|
|
34
|
-
return Array.from(new Float32Array(view.buffer, view.byteOffset, view.byteLength / 4));
|
|
35
|
-
}
|
|
36
|
-
return [];
|
|
37
|
-
}
|
|
18
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
38
19
|
|
|
39
20
|
// ── Detection ───────────────────────────────────────────────────────
|
|
40
21
|
|
|
41
|
-
/**
|
|
42
|
-
* Check if a path is a LanceDB directory (i.e. needs migration).
|
|
43
|
-
* Returns true if the path exists and is a directory.
|
|
44
|
-
*/
|
|
45
22
|
export function isLanceDbDirectory(dbPath: string): boolean {
|
|
46
23
|
return existsSync(dbPath) && statSync(dbPath).isDirectory();
|
|
47
24
|
}
|
|
48
25
|
|
|
49
|
-
// ──
|
|
26
|
+
// ── Types ───────────────────────────────────────────────────────────
|
|
50
27
|
|
|
51
28
|
export interface MigrateOptions {
|
|
52
|
-
/** Path to the LanceDB directory (source). */
|
|
53
29
|
source: string;
|
|
54
|
-
/** Path to the new SQLite file (target). */
|
|
55
30
|
target: string;
|
|
56
31
|
}
|
|
57
32
|
|
|
@@ -61,24 +36,44 @@ export interface MigrateResult {
|
|
|
61
36
|
outputSizeMB: string;
|
|
62
37
|
}
|
|
63
38
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
39
|
+
interface ExtractedData {
|
|
40
|
+
memories: Array<{
|
|
41
|
+
id: string;
|
|
42
|
+
content: string;
|
|
43
|
+
metadata: string;
|
|
44
|
+
vector: number[];
|
|
45
|
+
created_at: number;
|
|
46
|
+
updated_at: number;
|
|
47
|
+
last_accessed: number | null;
|
|
48
|
+
superseded_by: string | null;
|
|
49
|
+
usefulness: number;
|
|
50
|
+
access_count: number;
|
|
51
|
+
}>;
|
|
52
|
+
conversations: Array<{
|
|
53
|
+
id: string;
|
|
54
|
+
content: string;
|
|
55
|
+
metadata: string;
|
|
56
|
+
vector: number[];
|
|
57
|
+
created_at: number;
|
|
58
|
+
session_id: string;
|
|
59
|
+
role: string;
|
|
60
|
+
message_index_start: number;
|
|
61
|
+
message_index_end: number;
|
|
62
|
+
project: string;
|
|
63
|
+
}>;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// ── Migration ───────────────────────────────────────────────────────
|
|
67
|
+
|
|
70
68
|
export async function migrate(opts: MigrateOptions): Promise<MigrateResult> {
|
|
71
69
|
const { source, target } = opts;
|
|
72
70
|
|
|
73
|
-
// Validate source
|
|
74
71
|
if (!existsSync(source)) {
|
|
75
72
|
throw new Error(`Source not found: ${source}`);
|
|
76
73
|
}
|
|
77
74
|
if (!statSync(source).isDirectory()) {
|
|
78
75
|
throw new Error(`Source is not a directory (expected LanceDB): ${source}`);
|
|
79
76
|
}
|
|
80
|
-
|
|
81
|
-
// Prevent overwriting
|
|
82
77
|
if (existsSync(target)) {
|
|
83
78
|
throw new Error(
|
|
84
79
|
`Target already exists: ${target}\n Delete it first or choose a different target path.`
|
|
@@ -89,148 +84,100 @@ export async function migrate(opts: MigrateOptions): Promise<MigrateResult> {
|
|
|
89
84
|
console.error(`📄 Target (SQLite): ${target}`);
|
|
90
85
|
console.error();
|
|
91
86
|
|
|
92
|
-
//
|
|
93
|
-
|
|
87
|
+
// Phase 1: Extract data from LanceDB in a subprocess.
|
|
88
|
+
// This avoids a native symbol collision between @lancedb/lancedb and bun:sqlite.
|
|
89
|
+
const extractScript = resolve(__dirname, "..", "scripts", "lancedb-extract.ts");
|
|
90
|
+
const proc = Bun.spawn(["bun", extractScript, source], {
|
|
91
|
+
stdout: "pipe",
|
|
92
|
+
stderr: "inherit",
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const output = await new Response(proc.stdout).text();
|
|
96
|
+
const exitCode = await proc.exited;
|
|
97
|
+
|
|
98
|
+
if (exitCode !== 0) {
|
|
99
|
+
throw new Error(`LanceDB extraction failed (exit code ${exitCode})`);
|
|
100
|
+
}
|
|
94
101
|
|
|
95
|
-
|
|
96
|
-
const lanceDb = await lancedb.connect(source);
|
|
97
|
-
const tableNames = await lanceDb.tableNames();
|
|
98
|
-
console.error(`Found tables: ${tableNames.join(", ")}`);
|
|
102
|
+
const data: ExtractedData = JSON.parse(output);
|
|
99
103
|
|
|
100
|
-
//
|
|
104
|
+
// Phase 2: Write to SQLite (no LanceDB in this process).
|
|
101
105
|
const sqliteDb = connectToDatabase(target);
|
|
102
106
|
|
|
103
107
|
let memoriesMigrated = 0;
|
|
104
108
|
let conversationChunksMigrated = 0;
|
|
105
109
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
const memoriesTable = await lanceDb.openTable("memories");
|
|
109
|
-
const totalMemories = await memoriesTable.countRows();
|
|
110
|
-
console.error(`\n🧠 Migrating ${totalMemories} memories...`);
|
|
110
|
+
if (data.memories.length > 0) {
|
|
111
|
+
console.error(`\n🧠 Writing ${data.memories.length} memories to SQLite...`);
|
|
111
112
|
|
|
112
113
|
const insertMain = sqliteDb.prepare(
|
|
113
114
|
`INSERT OR REPLACE INTO memories
|
|
114
115
|
(id, content, metadata, created_at, updated_at, superseded_by, usefulness, access_count, last_accessed)
|
|
115
116
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
116
117
|
);
|
|
118
|
+
const deleteVec = sqliteDb.prepare(`DELETE FROM memories_vec WHERE id = ?`);
|
|
117
119
|
const insertVec = sqliteDb.prepare(
|
|
118
120
|
`INSERT INTO memories_vec (id, vector) VALUES (?, ?)`
|
|
119
121
|
);
|
|
120
122
|
const insertFts = sqliteDb.prepare(
|
|
121
|
-
`INSERT INTO memories_fts (id, content) VALUES (?, ?)`
|
|
123
|
+
`INSERT OR REPLACE INTO memories_fts (id, content) VALUES (?, ?)`
|
|
122
124
|
);
|
|
123
125
|
|
|
124
|
-
const
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
const updatedAt = toEpochMs(row.updated_at);
|
|
136
|
-
const lastAccessed = row.last_accessed != null ? toEpochMs(row.last_accessed) : null;
|
|
137
|
-
|
|
138
|
-
insertMain.run(
|
|
139
|
-
row.id,
|
|
140
|
-
row.content,
|
|
141
|
-
row.metadata ?? "{}",
|
|
142
|
-
createdAt,
|
|
143
|
-
updatedAt,
|
|
144
|
-
row.superseded_by ?? null,
|
|
145
|
-
row.usefulness ?? 0,
|
|
146
|
-
row.access_count ?? 0,
|
|
147
|
-
lastAccessed,
|
|
148
|
-
);
|
|
149
|
-
|
|
150
|
-
if (vec.length > 0) {
|
|
151
|
-
insertVec.run(row.id, serializeVector(vec));
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
insertFts.run(row.id, row.content);
|
|
126
|
+
const tx = sqliteDb.transaction(() => {
|
|
127
|
+
for (const row of data.memories) {
|
|
128
|
+
insertMain.run(
|
|
129
|
+
row.id, row.content, row.metadata,
|
|
130
|
+
row.created_at, row.updated_at,
|
|
131
|
+
row.superseded_by, row.usefulness,
|
|
132
|
+
row.access_count, row.last_accessed,
|
|
133
|
+
);
|
|
134
|
+
if (row.vector.length > 0) {
|
|
135
|
+
deleteVec.run(row.id);
|
|
136
|
+
insertVec.run(row.id, serializeVector(row.vector));
|
|
155
137
|
}
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
tx();
|
|
159
|
-
memoriesMigrated += rows.length;
|
|
160
|
-
offset += BATCH_SIZE;
|
|
161
|
-
|
|
162
|
-
if (totalMemories > BATCH_SIZE) {
|
|
163
|
-
process.stderr.write(` ${memoriesMigrated}/${totalMemories}\r`);
|
|
138
|
+
insertFts.run(row.id, row.content);
|
|
164
139
|
}
|
|
165
|
-
}
|
|
166
|
-
|
|
140
|
+
});
|
|
141
|
+
tx();
|
|
142
|
+
memoriesMigrated = data.memories.length;
|
|
167
143
|
console.error(` ✅ ${memoriesMigrated} memories migrated`);
|
|
168
144
|
}
|
|
169
145
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
const convTable = await lanceDb.openTable("conversation_history");
|
|
173
|
-
const totalConv = await convTable.countRows();
|
|
174
|
-
console.error(`\n💬 Migrating ${totalConv} conversation chunks...`);
|
|
146
|
+
if (data.conversations.length > 0) {
|
|
147
|
+
console.error(`\n💬 Writing ${data.conversations.length} conversation chunks to SQLite...`);
|
|
175
148
|
|
|
176
149
|
const insertMain = sqliteDb.prepare(
|
|
177
150
|
`INSERT OR REPLACE INTO conversation_history
|
|
178
151
|
(id, content, metadata, created_at, session_id, role, message_index_start, message_index_end, project)
|
|
179
152
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
180
153
|
);
|
|
154
|
+
const deleteVec = sqliteDb.prepare(`DELETE FROM conversation_history_vec WHERE id = ?`);
|
|
181
155
|
const insertVec = sqliteDb.prepare(
|
|
182
156
|
`INSERT INTO conversation_history_vec (id, vector) VALUES (?, ?)`
|
|
183
157
|
);
|
|
184
158
|
const insertFts = sqliteDb.prepare(
|
|
185
|
-
`INSERT INTO conversation_history_fts (id, content) VALUES (?, ?)`
|
|
159
|
+
`INSERT OR REPLACE INTO conversation_history_fts (id, content) VALUES (?, ?)`
|
|
186
160
|
);
|
|
187
161
|
|
|
188
|
-
const
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
const createdAt = toEpochMs(row.created_at);
|
|
199
|
-
|
|
200
|
-
insertMain.run(
|
|
201
|
-
row.id,
|
|
202
|
-
row.content,
|
|
203
|
-
row.metadata ?? "{}",
|
|
204
|
-
createdAt,
|
|
205
|
-
row.session_id,
|
|
206
|
-
row.role,
|
|
207
|
-
row.message_index_start ?? 0,
|
|
208
|
-
row.message_index_end ?? 0,
|
|
209
|
-
row.project ?? "",
|
|
210
|
-
);
|
|
211
|
-
|
|
212
|
-
if (vec.length > 0) {
|
|
213
|
-
insertVec.run(row.id, serializeVector(vec));
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
insertFts.run(row.id, row.content);
|
|
162
|
+
const tx = sqliteDb.transaction(() => {
|
|
163
|
+
for (const row of data.conversations) {
|
|
164
|
+
insertMain.run(
|
|
165
|
+
row.id, row.content, row.metadata,
|
|
166
|
+
row.created_at, row.session_id, row.role,
|
|
167
|
+
row.message_index_start, row.message_index_end, row.project,
|
|
168
|
+
);
|
|
169
|
+
if (row.vector.length > 0) {
|
|
170
|
+
deleteVec.run(row.id);
|
|
171
|
+
insertVec.run(row.id, serializeVector(row.vector));
|
|
217
172
|
}
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
tx();
|
|
221
|
-
conversationChunksMigrated += rows.length;
|
|
222
|
-
offset += BATCH_SIZE;
|
|
223
|
-
|
|
224
|
-
if (totalConv > BATCH_SIZE) {
|
|
225
|
-
process.stderr.write(` ${conversationChunksMigrated}/${totalConv}\r`);
|
|
173
|
+
insertFts.run(row.id, row.content);
|
|
226
174
|
}
|
|
227
|
-
}
|
|
228
|
-
|
|
175
|
+
});
|
|
176
|
+
tx();
|
|
177
|
+
conversationChunksMigrated = data.conversations.length;
|
|
229
178
|
console.error(` ✅ ${conversationChunksMigrated} conversation chunks migrated`);
|
|
230
179
|
}
|
|
231
180
|
|
|
232
|
-
// ── Finalize ────────────────────────────────────────────────────
|
|
233
|
-
await lanceDb.close?.();
|
|
234
181
|
sqliteDb.close();
|
|
235
182
|
|
|
236
183
|
const { size } = statSync(target);
|
|
@@ -239,9 +186,6 @@ export async function migrate(opts: MigrateOptions): Promise<MigrateResult> {
|
|
|
239
186
|
return { memoriesMigrated, conversationChunksMigrated, outputSizeMB };
|
|
240
187
|
}
|
|
241
188
|
|
|
242
|
-
/**
|
|
243
|
-
* Format a human-readable summary after migration completes.
|
|
244
|
-
*/
|
|
245
189
|
export function formatMigrationSummary(
|
|
246
190
|
source: string,
|
|
247
191
|
target: string,
|