@aeriondyseti/vector-memory-mcp 2.1.1 → 2.2.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aeriondyseti/vector-memory-mcp",
3
- "version": "2.1.1",
3
+ "version": "2.2.0-dev.1",
4
4
  "description": "A zero-configuration RAG memory server for MCP clients",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
@@ -51,8 +51,7 @@
51
51
  "@modelcontextprotocol/sdk": "^1.0.0",
52
52
  "apache-arrow": "^21.1.0",
53
53
  "arg": "^5.0.2",
54
- "hono": "^4.11.3",
55
- "sqlite-vec": "^0.1.6"
54
+ "hono": "^4.11.3"
56
55
  },
57
56
  "devDependencies": {
58
57
  "@types/bun": "latest",
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Standalone LanceDB data extractor — runs in a child process so that
4
+ * @lancedb/lancedb native bindings never coexist with bun:sqlite's
5
+ * extension loading in the same process.
6
+ *
7
+ * Usage: bun scripts/lancedb-extract.ts <lance-db-path>
8
+ * Output: JSON on stdout — { memories: Row[], conversations: Row[] }
9
+ */
10
+
11
+ const source = process.argv[2];
12
+ if (!source) {
13
+ console.error("Usage: bun scripts/lancedb-extract.ts <lance-db-path>");
14
+ process.exit(1);
15
+ }
16
+
17
+ function toEpochMs(value: unknown): number {
18
+ if (typeof value === "number") return value;
19
+ if (value instanceof Date) return value.getTime();
20
+ if (typeof value === "bigint") return Number(value);
21
+ return Date.now();
22
+ }
23
+
24
+ function toFloatArray(vec: unknown): number[] {
25
+ if (Array.isArray(vec)) return vec;
26
+ if (vec instanceof Float32Array) return Array.from(vec);
27
+ if (vec && typeof (vec as any).toArray === "function") {
28
+ return Array.from((vec as any).toArray());
29
+ }
30
+ if (ArrayBuffer.isView(vec)) {
31
+ const view = vec as DataView;
32
+ return Array.from(new Float32Array(view.buffer, view.byteOffset, view.byteLength / 4));
33
+ }
34
+ return [];
35
+ }
36
+
37
+ const lancedb = await import("@lancedb/lancedb");
38
+ const db = await lancedb.connect(source);
39
+ const tableNames = await db.tableNames();
40
+ console.error(`Found tables: ${tableNames.join(", ")}`);
41
+
42
+ const result: { memories: any[]; conversations: any[] } = {
43
+ memories: [],
44
+ conversations: [],
45
+ };
46
+
47
+ const BATCH_SIZE = 500;
48
+
49
+ if (tableNames.includes("memories")) {
50
+ const table = await db.openTable("memories");
51
+ const total = await table.countRows();
52
+ console.error(`Reading ${total} memories...`);
53
+
54
+ let offset = 0;
55
+ while (true) {
56
+ const rows = await table.query().limit(BATCH_SIZE).offset(offset).toArray();
57
+ if (rows.length === 0) break;
58
+ for (const row of rows) {
59
+ result.memories.push({
60
+ id: row.id,
61
+ content: row.content,
62
+ metadata: row.metadata ?? "{}",
63
+ vector: toFloatArray(row.vector),
64
+ created_at: toEpochMs(row.created_at),
65
+ updated_at: toEpochMs(row.updated_at),
66
+ last_accessed: row.last_accessed != null ? toEpochMs(row.last_accessed) : null,
67
+ superseded_by: row.superseded_by ?? null,
68
+ usefulness: row.usefulness ?? 0,
69
+ access_count: row.access_count ?? 0,
70
+ });
71
+ }
72
+ offset += BATCH_SIZE;
73
+ }
74
+ console.error(` ${result.memories.length} memories read`);
75
+ }
76
+
77
+ if (tableNames.includes("conversation_history")) {
78
+ const table = await db.openTable("conversation_history");
79
+ const total = await table.countRows();
80
+ console.error(`Reading ${total} conversation chunks...`);
81
+
82
+ let offset = 0;
83
+ while (true) {
84
+ const rows = await table.query().limit(BATCH_SIZE).offset(offset).toArray();
85
+ if (rows.length === 0) break;
86
+ for (const row of rows) {
87
+ result.conversations.push({
88
+ id: row.id,
89
+ content: row.content,
90
+ metadata: row.metadata ?? "{}",
91
+ vector: toFloatArray(row.vector),
92
+ created_at: toEpochMs(row.created_at),
93
+ session_id: row.session_id,
94
+ role: row.role,
95
+ message_index_start: row.message_index_start ?? 0,
96
+ message_index_end: row.message_index_end ?? 0,
97
+ project: row.project ?? "",
98
+ });
99
+ }
100
+ offset += BATCH_SIZE;
101
+ }
102
+ console.error(` ${result.conversations.length} conversation chunks read`);
103
+ }
104
+
105
+ await db.close?.();
106
+ process.stdout.write(JSON.stringify(result));
@@ -1,12 +1,11 @@
1
1
  import { Database } from "bun:sqlite";
2
- import * as sqliteVec from "sqlite-vec";
3
2
  import { mkdirSync } from "fs";
4
3
  import { dirname } from "path";
5
4
  import { runMigrations } from "./migrations.js";
6
5
 
7
6
  /**
8
- * Open (or create) a SQLite database at the given path,
9
- * load the sqlite-vec extension, and run schema migrations.
7
+ * Open (or create) a SQLite database at the given path
8
+ * and run schema migrations.
10
9
  */
11
10
  export function connectToDatabase(dbPath: string): Database {
12
11
  mkdirSync(dirname(dbPath), { recursive: true });
@@ -15,9 +14,6 @@ export function connectToDatabase(dbPath: string): Database {
15
14
  // WAL mode for concurrent read performance
16
15
  db.exec("PRAGMA journal_mode=WAL");
17
16
 
18
- // Load sqlite-vec extension
19
- sqliteVec.load(db);
20
-
21
17
  // Ensure schema is up to date
22
18
  runMigrations(db);
23
19
 
@@ -9,6 +9,7 @@ import {
9
9
  sanitizeFtsQuery,
10
10
  hybridRRF,
11
11
  topByRRF,
12
+ knnSearch,
12
13
  } from "./sqlite-utils.js";
13
14
 
14
15
  export class ConversationRepository {
@@ -112,14 +113,8 @@ export class ConversationRepository {
112
113
  ): Promise<ConversationHybridRow[]> {
113
114
  const candidateCount = limit * 3;
114
115
 
115
- // Vector KNN search
116
- const vecResults = this.db
117
- .prepare(
118
- `SELECT id FROM conversation_history_vec
119
- WHERE vector MATCH ? AND k = ?
120
- ORDER BY distance`
121
- )
122
- .all(serializeVector(embedding), candidateCount) as Array<{ id: string }>;
116
+ // Vector KNN search (brute-force cosine similarity in JS)
117
+ const vecResults = knnSearch(this.db, "conversation_history_vec", embedding, candidateCount);
123
118
 
124
119
  // FTS5 search
125
120
  const ftsQuery = sanitizeFtsQuery(query);
@@ -6,6 +6,7 @@ import {
6
6
  sanitizeFtsQuery,
7
7
  hybridRRF,
8
8
  topByRRF,
9
+ knnSearch,
9
10
  } from "./sqlite-utils.js";
10
11
  import {
11
12
  type Memory,
@@ -46,7 +47,7 @@ export class MemoryRepository {
46
47
  }
47
48
 
48
49
  /**
49
- * Fetch the embedding vector for a memory id from the vec0 table.
50
+ * Fetch the embedding vector for a memory id.
50
51
  */
51
52
  private getEmbedding(id: string): number[] {
52
53
  const row = this.db
@@ -110,7 +111,6 @@ export class MemoryRepository {
110
111
  memory.lastAccessed?.getTime() ?? null,
111
112
  );
112
113
 
113
- // vec0 virtual tables don't support REPLACE — delete then insert
114
114
  this.db.prepare("DELETE FROM memories_vec WHERE id = ?").run(memory.id);
115
115
  this.db
116
116
  .prepare("INSERT INTO memories_vec (id, vector) VALUES (?, ?)")
@@ -170,14 +170,9 @@ export class MemoryRepository {
170
170
  limit: number,
171
171
  ): Promise<HybridRow[]> {
172
172
  const candidateLimit = limit * 3;
173
- const vecBuf = serializeVector(embedding);
174
173
 
175
- // Vector KNN search
176
- const vectorResults = this.db
177
- .prepare(
178
- "SELECT id, distance FROM memories_vec WHERE vector MATCH ? AND k = ? ORDER BY distance",
179
- )
180
- .all(vecBuf, candidateLimit) as Array<{ id: string; distance: number }>;
174
+ // Vector KNN search (brute-force cosine similarity in JS)
175
+ const vectorResults = knnSearch(this.db, "memories_vec", embedding, candidateLimit);
181
176
 
182
177
  // Full-text search
183
178
  const ftsQuery = sanitizeFtsQuery(query);
@@ -1,5 +1,33 @@
1
1
  import type { Database } from "bun:sqlite";
2
2
 
3
+ /**
4
+ * Check if a table exists and is a vec0 virtual table (from the old sqlite-vec schema).
5
+ */
6
+ function isVec0Table(db: Database, tableName: string): boolean {
7
+ const row = db
8
+ .prepare(
9
+ `SELECT sql FROM sqlite_master WHERE type = 'table' AND name = ?`,
10
+ )
11
+ .get(tableName) as { sql: string } | null;
12
+ return row?.sql?.toLowerCase().includes("vec0") ?? false;
13
+ }
14
+
15
+ /**
16
+ * Migrate a vec0 virtual table to a plain BLOB table.
17
+ * Copies id + vector data, drops the vec0 table and its shadow tables, then
18
+ * creates the new plain table with the copied data.
19
+ */
20
+ function migrateVec0ToBlob(db: Database, tableName: string): void {
21
+ const tmpTable = `${tableName}_migration_tmp`;
22
+
23
+ db.exec(`CREATE TABLE IF NOT EXISTS ${tmpTable} (id TEXT PRIMARY KEY, vector BLOB NOT NULL)`);
24
+ db.exec(`INSERT OR IGNORE INTO ${tmpTable} (id, vector) SELECT id, vector FROM ${tableName}`);
25
+ db.exec(`DROP TABLE ${tableName}`);
26
+ db.exec(`CREATE TABLE ${tableName} (id TEXT PRIMARY KEY, vector BLOB NOT NULL)`);
27
+ db.exec(`INSERT INTO ${tableName} (id, vector) SELECT id, vector FROM ${tmpTable}`);
28
+ db.exec(`DROP TABLE ${tmpTable}`);
29
+ }
30
+
3
31
  /**
4
32
  * Run all schema migrations. Safe to call on every startup (uses IF NOT EXISTS).
5
33
  */
@@ -19,12 +47,17 @@ export function runMigrations(db: Database): void {
19
47
  )
20
48
  `);
21
49
 
22
- db.exec(`
23
- CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec USING vec0(
24
- id TEXT PRIMARY KEY,
25
- vector float[384]
26
- )
27
- `);
50
+ // Migrate vec0 -> plain blob table if upgrading from sqlite-vec schema
51
+ if (isVec0Table(db, "memories_vec")) {
52
+ migrateVec0ToBlob(db, "memories_vec");
53
+ } else {
54
+ db.exec(`
55
+ CREATE TABLE IF NOT EXISTS memories_vec (
56
+ id TEXT PRIMARY KEY,
57
+ vector BLOB NOT NULL
58
+ )
59
+ `);
60
+ }
28
61
 
29
62
  db.exec(`
30
63
  CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
@@ -48,12 +81,17 @@ export function runMigrations(db: Database): void {
48
81
  )
49
82
  `);
50
83
 
51
- db.exec(`
52
- CREATE VIRTUAL TABLE IF NOT EXISTS conversation_history_vec USING vec0(
53
- id TEXT PRIMARY KEY,
54
- vector float[384]
55
- )
56
- `);
84
+ // Migrate vec0 -> plain blob table if upgrading from sqlite-vec schema
85
+ if (isVec0Table(db, "conversation_history_vec")) {
86
+ migrateVec0ToBlob(db, "conversation_history_vec");
87
+ } else {
88
+ db.exec(`
89
+ CREATE TABLE IF NOT EXISTS conversation_history_vec (
90
+ id TEXT PRIMARY KEY,
91
+ vector BLOB NOT NULL
92
+ )
93
+ `);
94
+ }
57
95
 
58
96
  db.exec(`
59
97
  CREATE VIRTUAL TABLE IF NOT EXISTS conversation_history_fts USING fts5(
@@ -4,19 +4,60 @@ import type { Database } from "bun:sqlite";
4
4
  export const RRF_K = 60;
5
5
 
6
6
  /**
7
- * Serialize a number[] embedding to the raw float32 bytes sqlite-vec expects.
7
+ * Serialize a number[] embedding to raw float32 bytes for BLOB storage.
8
8
  */
9
9
  export function serializeVector(vec: number[]): Buffer {
10
10
  return Buffer.from(new Float32Array(vec).buffer);
11
11
  }
12
12
 
13
13
  /**
14
- * Deserialize raw float32 bytes from sqlite-vec back to number[].
14
+ * Deserialize raw float32 bytes back to number[].
15
15
  */
16
16
  export function deserializeVector(buf: Buffer): number[] {
17
17
  return Array.from(new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4));
18
18
  }
19
19
 
20
+ /**
21
+ * Cosine similarity between two pre-normalized Float32Arrays.
22
+ * Returns dot product (equivalent to cosine sim when vectors are unit-length).
23
+ */
24
+ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
25
+ let dot = 0;
26
+ for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
27
+ return dot;
28
+ }
29
+
30
+ /**
31
+ * Brute-force KNN search over a vector blob table.
32
+ * Loads all vectors, computes cosine similarity, returns top-K results
33
+ * sorted by descending similarity (ascending distance).
34
+ */
35
+ export function knnSearch(
36
+ db: Database,
37
+ table: string,
38
+ queryVec: number[],
39
+ k: number,
40
+ ): Array<{ id: string; distance: number }> {
41
+ const rows = db
42
+ .prepare(`SELECT id, vector FROM ${table}`)
43
+ .all() as Array<{ id: string; vector: Buffer }>;
44
+
45
+ const qv = new Float32Array(queryVec);
46
+ const scored = rows.map((r) => {
47
+ const vec = new Float32Array(
48
+ r.vector.buffer,
49
+ r.vector.byteOffset,
50
+ r.vector.byteLength / 4,
51
+ );
52
+ const sim = cosineSimilarity(qv, vec);
53
+ // Convert similarity to distance (1 - sim) for consistency with previous API
54
+ return { id: r.id, distance: 1 - sim };
55
+ });
56
+
57
+ scored.sort((a, b) => a.distance - b.distance);
58
+ return scored.slice(0, k);
59
+ }
60
+
20
61
  /**
21
62
  * Sanitize a user query for FTS5 by quoting each token as a literal.
22
63
  * Prevents FTS5 syntax errors from special characters like AND, OR, *, etc.
@@ -189,7 +189,8 @@ export function createHttpApp(memoryService: MemoryService, config: Config): Hon
189
189
  // Get latest waypoint
190
190
  app.get("/waypoint", async (c) => {
191
191
  try {
192
- const waypoint = await memoryService.getLatestWaypoint();
192
+ const project = c.req.query("project");
193
+ const waypoint = await memoryService.getLatestWaypoint(project);
193
194
 
194
195
  if (!waypoint) {
195
196
  return c.json({ error: "No waypoint found" }, 404);
@@ -38,6 +38,10 @@ function errorText(e: unknown): string {
38
38
  return e instanceof Error ? e.message : String(e);
39
39
  }
40
40
 
41
+ function errorResult(text: string): CallToolResult {
42
+ return { isError: true, content: [{ type: "text", text }] };
43
+ }
44
+
41
45
  function parseDate(value: unknown, fieldName: string): Date | undefined {
42
46
  if (value === undefined) return undefined;
43
47
  const date = new Date(value as string);
@@ -59,7 +63,7 @@ export async function handleStoreMemories(
59
63
  try {
60
64
  memories = asArray(args?.memories, "memories");
61
65
  } catch (e) {
62
- return { isError: true, content: [{ type: "text", text: errorText(e) }] };
66
+ return errorResult(errorText(e));
63
67
  }
64
68
 
65
69
  const ids: string[] = [];
@@ -93,7 +97,7 @@ export async function handleDeleteMemories(
93
97
  try {
94
98
  ids = asArray(args?.ids, "ids");
95
99
  } catch (e) {
96
- return { isError: true, content: [{ type: "text", text: errorText(e) }] };
100
+ return errorResult(errorText(e));
97
101
  }
98
102
  const results: string[] = [];
99
103
 
@@ -128,7 +132,7 @@ export async function handleUpdateMemories(
128
132
  try {
129
133
  updates = asArray(args?.updates, "updates");
130
134
  } catch (e) {
131
- return { isError: true, content: [{ type: "text", text: errorText(e) }] };
135
+ return errorResult(errorText(e));
132
136
  }
133
137
 
134
138
  const results: string[] = [];
@@ -168,10 +172,11 @@ export async function handleSearchMemories(
168
172
  ): Promise<CallToolResult> {
169
173
  const query = args?.query;
170
174
  if (typeof query !== "string" || query.trim() === "") {
171
- return { isError: true, content: [{ type: "text", text: "query is required and must be a non-empty string" }] };
175
+ return errorResult("query is required and must be a non-empty string");
172
176
  }
173
177
  const intent = (args?.intent as SearchIntent) ?? "fact_check";
174
178
  const limit = (args?.limit as number) ?? 10;
179
+ const offset = (args?.offset as number) ?? 0;
175
180
  const includeDeleted = (args?.include_deleted as boolean) ?? false;
176
181
  const historyOnly = (args?.history_only as boolean) ?? false;
177
182
  // history_only implies include_history
@@ -181,13 +186,14 @@ export async function handleSearchMemories(
181
186
  try {
182
187
  historyFilters = parseHistoryFilters(args);
183
188
  } catch (e) {
184
- return { isError: true, content: [{ type: "text", text: errorText(e) }] };
189
+ return errorResult(errorText(e));
185
190
  }
186
191
 
187
192
  const results = await service.search(query, intent, limit, includeDeleted, {
188
193
  includeHistory,
189
194
  historyOnly,
190
195
  historyFilters,
196
+ offset,
191
197
  });
192
198
 
193
199
  if (results.length === 0) {
@@ -243,7 +249,7 @@ export async function handleGetMemories(
243
249
  try {
244
250
  ids = asArray(args?.ids, "ids");
245
251
  } catch (e) {
246
- return { isError: true, content: [{ type: "text", text: errorText(e) }] };
252
+ return errorResult(errorText(e));
247
253
  }
248
254
 
249
255
  const memories = await service.getMultiple(ids);
@@ -267,10 +273,7 @@ export async function handleReportMemoryUsefulness(
267
273
  const memory = await service.vote(memoryId, useful ? 1 : -1);
268
274
 
269
275
  if (!memory) {
270
- return {
271
- content: [{ type: "text", text: `Memory ${memoryId} not found` }],
272
- isError: true,
273
- };
276
+ return errorResult(`Memory ${memoryId} not found`);
274
277
  }
275
278
 
276
279
  return {
@@ -305,10 +308,11 @@ export async function handleSetWaypoint(
305
308
  }
306
309
 
307
310
  export async function handleGetWaypoint(
308
- _args: Record<string, unknown> | undefined,
311
+ args: Record<string, unknown> | undefined,
309
312
  service: MemoryService
310
313
  ): Promise<CallToolResult> {
311
- const waypoint = await service.getLatestWaypoint();
314
+ const project = args?.project as string | undefined;
315
+ const waypoint = await service.getLatestWaypoint(project);
312
316
 
313
317
  if (!waypoint) {
314
318
  return {
@@ -350,15 +354,9 @@ function requireConversationService(
350
354
  const conversationService = service.getConversationService();
351
355
  if (!conversationService) {
352
356
  return {
353
- error: {
354
- content: [
355
- {
356
- type: "text",
357
- text: "Conversation history indexing is not enabled. Enable it with --enable-history.",
358
- },
359
- ],
360
- isError: true,
361
- },
357
+ error: errorResult(
358
+ "Conversation history indexing is not enabled. Enable it with --enable-history."
359
+ ),
362
360
  };
363
361
  }
364
362
  return { service: conversationService };
@@ -376,7 +374,7 @@ export async function handleIndexConversations(
376
374
  const sinceStr = args?.since as string | undefined;
377
375
  const since = sinceStr ? new Date(sinceStr) : undefined;
378
376
  if (since && isNaN(since.getTime())) {
379
- return { isError: true, content: [{ type: "text", text: "Invalid 'since' date format" }] };
377
+ return errorResult("Invalid 'since' date format");
380
378
  }
381
379
 
382
380
  const result = await conversationService.indexConversations(path, since);
@@ -444,18 +442,12 @@ export async function handleReindexSession(
444
442
 
445
443
  const sessionId = args?.session_id as string | undefined;
446
444
  if (!sessionId) {
447
- return {
448
- content: [{ type: "text", text: "session_id is required" }],
449
- isError: true,
450
- };
445
+ return errorResult("session_id is required");
451
446
  }
452
447
  const result = await conversationService.reindexSession(sessionId);
453
448
 
454
449
  if (!result.success) {
455
- return {
456
- content: [{ type: "text", text: `Reindex failed: ${result.error}` }],
457
- isError: true,
458
- };
450
+ return errorResult(`Reindex failed: ${result.error}`);
459
451
  }
460
452
 
461
453
  return {
@@ -497,9 +489,6 @@ export async function handleToolCall(
497
489
  case "reindex_session":
498
490
  return handleReindexSession(args, service);
499
491
  default:
500
- return {
501
- content: [{ type: "text", text: `Unknown tool: ${name}` }],
502
- isError: true,
503
- };
492
+ return errorResult(`Unknown tool: ${name}`);
504
493
  }
505
494
  }
package/src/mcp/tools.ts CHANGED
@@ -162,6 +162,11 @@ When in doubt, search. Missing context is costlier than an extra query.`,
162
162
  description: "Maximum results to return (default: 10).",
163
163
  default: 10,
164
164
  },
165
+ offset: {
166
+ type: "integer",
167
+ description: "Number of results to skip for pagination (default: 0).",
168
+ default: 0,
169
+ },
165
170
  include_deleted: {
166
171
  type: "boolean",
167
172
  description: "Include soft-deleted memories in results (default: false). Useful for recovering prior information.",
@@ -297,7 +302,13 @@ export const getWaypointTool: Tool = {
297
302
  "Load the current project waypoint snapshot. Call at conversation start or when resuming a project.",
298
303
  inputSchema: {
299
304
  type: "object",
300
- properties: {},
305
+ properties: {
306
+ project: {
307
+ type: "string",
308
+ description:
309
+ "Project name to retrieve waypoint for. If omitted, retrieves the default (legacy) waypoint.",
310
+ },
311
+ },
301
312
  },
302
313
  };
303
314
 
package/src/migration.ts CHANGED
@@ -1,57 +1,32 @@
1
1
  /**
2
- * LanceDB SQLite (sqlite-vec) migration logic.
2
+ * LanceDB -> SQLite migration logic.
3
3
  *
4
- * This module is the shared core used by both the `migrate` subcommand
5
- * and the standalone `scripts/migrate-from-lancedb.ts` script.
4
+ * Reads LanceDB data in a child process (scripts/lancedb-extract.ts) to avoid
5
+ * a native symbol collision between @lancedb/lancedb and bun:sqlite.
6
+ * The extracted JSON is then written to SQLite in-process.
6
7
  *
7
8
  * @deprecated Will be removed in the next major version once LanceDB
8
9
  * support is dropped.
9
10
  */
10
11
 
11
12
  import { existsSync, statSync } from "fs";
13
+ import { resolve, dirname } from "path";
14
+ import { fileURLToPath } from "url";
12
15
  import { connectToDatabase } from "./db/connection.js";
13
16
  import { serializeVector } from "./db/sqlite-utils.js";
14
17
 
15
- // ── Helpers ─────────────────────────────────────────────────────────
16
-
17
- function toEpochMs(value: unknown): number {
18
- if (typeof value === "number") return value;
19
- if (value instanceof Date) return value.getTime();
20
- if (typeof value === "bigint") return Number(value);
21
- console.warn(`⚠️ Unexpected timestamp type: ${typeof value} (value: ${value}), using current time`);
22
- return Date.now();
23
- }
24
-
25
- function toFloatArray(vec: unknown): number[] {
26
- if (Array.isArray(vec)) return vec;
27
- if (vec instanceof Float32Array) return Array.from(vec);
28
- // Arrow Vector objects have a .toArray() method that returns Float32Array
29
- if (vec && typeof (vec as any).toArray === "function") {
30
- return Array.from((vec as any).toArray());
31
- }
32
- if (ArrayBuffer.isView(vec)) {
33
- const view = vec as DataView;
34
- return Array.from(new Float32Array(view.buffer, view.byteOffset, view.byteLength / 4));
35
- }
36
- return [];
37
- }
18
+ const __dirname = dirname(fileURLToPath(import.meta.url));
38
19
 
39
20
  // ── Detection ───────────────────────────────────────────────────────
40
21
 
41
- /**
42
- * Check if a path is a LanceDB directory (i.e. needs migration).
43
- * Returns true if the path exists and is a directory.
44
- */
45
22
  export function isLanceDbDirectory(dbPath: string): boolean {
46
23
  return existsSync(dbPath) && statSync(dbPath).isDirectory();
47
24
  }
48
25
 
49
- // ── Migration ───────────────────────────────────────────────────────
26
+ // ── Types ───────────────────────────────────────────────────────────
50
27
 
51
28
  export interface MigrateOptions {
52
- /** Path to the LanceDB directory (source). */
53
29
  source: string;
54
- /** Path to the new SQLite file (target). */
55
30
  target: string;
56
31
  }
57
32
 
@@ -61,24 +36,44 @@ export interface MigrateResult {
61
36
  outputSizeMB: string;
62
37
  }
63
38
 
64
- /**
65
- * Run the full LanceDB → SQLite migration.
66
- *
67
- * Dynamically imports @lancedb/lancedb so the cost is only paid
68
- * when the migration is actually invoked.
69
- */
39
+ interface ExtractedData {
40
+ memories: Array<{
41
+ id: string;
42
+ content: string;
43
+ metadata: string;
44
+ vector: number[];
45
+ created_at: number;
46
+ updated_at: number;
47
+ last_accessed: number | null;
48
+ superseded_by: string | null;
49
+ usefulness: number;
50
+ access_count: number;
51
+ }>;
52
+ conversations: Array<{
53
+ id: string;
54
+ content: string;
55
+ metadata: string;
56
+ vector: number[];
57
+ created_at: number;
58
+ session_id: string;
59
+ role: string;
60
+ message_index_start: number;
61
+ message_index_end: number;
62
+ project: string;
63
+ }>;
64
+ }
65
+
66
+ // ── Migration ───────────────────────────────────────────────────────
67
+
70
68
  export async function migrate(opts: MigrateOptions): Promise<MigrateResult> {
71
69
  const { source, target } = opts;
72
70
 
73
- // Validate source
74
71
  if (!existsSync(source)) {
75
72
  throw new Error(`Source not found: ${source}`);
76
73
  }
77
74
  if (!statSync(source).isDirectory()) {
78
75
  throw new Error(`Source is not a directory (expected LanceDB): ${source}`);
79
76
  }
80
-
81
- // Prevent overwriting
82
77
  if (existsSync(target)) {
83
78
  throw new Error(
84
79
  `Target already exists: ${target}\n Delete it first or choose a different target path.`
@@ -89,148 +84,100 @@ export async function migrate(opts: MigrateOptions): Promise<MigrateResult> {
89
84
  console.error(`📄 Target (SQLite): ${target}`);
90
85
  console.error();
91
86
 
92
- // Dynamic import only loads LanceDB when migration is actually run
93
- const lancedb = await import("@lancedb/lancedb");
87
+ // Phase 1: Extract data from LanceDB in a subprocess.
88
+ // This avoids a native symbol collision between @lancedb/lancedb and bun:sqlite.
89
+ const extractScript = resolve(__dirname, "..", "scripts", "lancedb-extract.ts");
90
+ const proc = Bun.spawn(["bun", extractScript, source], {
91
+ stdout: "pipe",
92
+ stderr: "inherit",
93
+ });
94
+
95
+ const output = await new Response(proc.stdout).text();
96
+ const exitCode = await proc.exited;
97
+
98
+ if (exitCode !== 0) {
99
+ throw new Error(`LanceDB extraction failed (exit code ${exitCode})`);
100
+ }
94
101
 
95
- // Open LanceDB
96
- const lanceDb = await lancedb.connect(source);
97
- const tableNames = await lanceDb.tableNames();
98
- console.error(`Found tables: ${tableNames.join(", ")}`);
102
+ const data: ExtractedData = JSON.parse(output);
99
103
 
100
- // Open SQLite (reuses shared connection setup: WAL, sqlite-vec, migrations)
104
+ // Phase 2: Write to SQLite (no LanceDB in this process).
101
105
  const sqliteDb = connectToDatabase(target);
102
106
 
103
107
  let memoriesMigrated = 0;
104
108
  let conversationChunksMigrated = 0;
105
109
 
106
- // ── Migrate memories ────────────────────────────────────────────
107
- if (tableNames.includes("memories")) {
108
- const memoriesTable = await lanceDb.openTable("memories");
109
- const totalMemories = await memoriesTable.countRows();
110
- console.error(`\n🧠 Migrating ${totalMemories} memories...`);
110
+ if (data.memories.length > 0) {
111
+ console.error(`\n🧠 Writing ${data.memories.length} memories to SQLite...`);
111
112
 
112
113
  const insertMain = sqliteDb.prepare(
113
114
  `INSERT OR REPLACE INTO memories
114
115
  (id, content, metadata, created_at, updated_at, superseded_by, usefulness, access_count, last_accessed)
115
116
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
116
117
  );
118
+ const deleteVec = sqliteDb.prepare(`DELETE FROM memories_vec WHERE id = ?`);
117
119
  const insertVec = sqliteDb.prepare(
118
120
  `INSERT INTO memories_vec (id, vector) VALUES (?, ?)`
119
121
  );
120
122
  const insertFts = sqliteDb.prepare(
121
- `INSERT INTO memories_fts (id, content) VALUES (?, ?)`
123
+ `INSERT OR REPLACE INTO memories_fts (id, content) VALUES (?, ?)`
122
124
  );
123
125
 
124
- const BATCH_SIZE = 500;
125
- let offset = 0;
126
-
127
- while (true) {
128
- const rows = await memoriesTable.query().limit(BATCH_SIZE).offset(offset).toArray();
129
- if (rows.length === 0) break;
130
-
131
- const tx = sqliteDb.transaction(() => {
132
- for (const row of rows) {
133
- const vec = toFloatArray(row.vector);
134
- const createdAt = toEpochMs(row.created_at);
135
- const updatedAt = toEpochMs(row.updated_at);
136
- const lastAccessed = row.last_accessed != null ? toEpochMs(row.last_accessed) : null;
137
-
138
- insertMain.run(
139
- row.id,
140
- row.content,
141
- row.metadata ?? "{}",
142
- createdAt,
143
- updatedAt,
144
- row.superseded_by ?? null,
145
- row.usefulness ?? 0,
146
- row.access_count ?? 0,
147
- lastAccessed,
148
- );
149
-
150
- if (vec.length > 0) {
151
- insertVec.run(row.id, serializeVector(vec));
152
- }
153
-
154
- insertFts.run(row.id, row.content);
126
+ const tx = sqliteDb.transaction(() => {
127
+ for (const row of data.memories) {
128
+ insertMain.run(
129
+ row.id, row.content, row.metadata,
130
+ row.created_at, row.updated_at,
131
+ row.superseded_by, row.usefulness,
132
+ row.access_count, row.last_accessed,
133
+ );
134
+ if (row.vector.length > 0) {
135
+ deleteVec.run(row.id);
136
+ insertVec.run(row.id, serializeVector(row.vector));
155
137
  }
156
- });
157
-
158
- tx();
159
- memoriesMigrated += rows.length;
160
- offset += BATCH_SIZE;
161
-
162
- if (totalMemories > BATCH_SIZE) {
163
- process.stderr.write(` ${memoriesMigrated}/${totalMemories}\r`);
138
+ insertFts.run(row.id, row.content);
164
139
  }
165
- }
166
-
140
+ });
141
+ tx();
142
+ memoriesMigrated = data.memories.length;
167
143
  console.error(` ✅ ${memoriesMigrated} memories migrated`);
168
144
  }
169
145
 
170
- // ── Migrate conversation history ────────────────────────────────
171
- if (tableNames.includes("conversation_history")) {
172
- const convTable = await lanceDb.openTable("conversation_history");
173
- const totalConv = await convTable.countRows();
174
- console.error(`\n💬 Migrating ${totalConv} conversation chunks...`);
146
+ if (data.conversations.length > 0) {
147
+ console.error(`\n💬 Writing ${data.conversations.length} conversation chunks to SQLite...`);
175
148
 
176
149
  const insertMain = sqliteDb.prepare(
177
150
  `INSERT OR REPLACE INTO conversation_history
178
151
  (id, content, metadata, created_at, session_id, role, message_index_start, message_index_end, project)
179
152
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
180
153
  );
154
+ const deleteVec = sqliteDb.prepare(`DELETE FROM conversation_history_vec WHERE id = ?`);
181
155
  const insertVec = sqliteDb.prepare(
182
156
  `INSERT INTO conversation_history_vec (id, vector) VALUES (?, ?)`
183
157
  );
184
158
  const insertFts = sqliteDb.prepare(
185
- `INSERT INTO conversation_history_fts (id, content) VALUES (?, ?)`
159
+ `INSERT OR REPLACE INTO conversation_history_fts (id, content) VALUES (?, ?)`
186
160
  );
187
161
 
188
- const BATCH_SIZE = 500;
189
- let offset = 0;
190
-
191
- while (true) {
192
- const rows = await convTable.query().limit(BATCH_SIZE).offset(offset).toArray();
193
- if (rows.length === 0) break;
194
-
195
- const tx = sqliteDb.transaction(() => {
196
- for (const row of rows) {
197
- const vec = toFloatArray(row.vector);
198
- const createdAt = toEpochMs(row.created_at);
199
-
200
- insertMain.run(
201
- row.id,
202
- row.content,
203
- row.metadata ?? "{}",
204
- createdAt,
205
- row.session_id,
206
- row.role,
207
- row.message_index_start ?? 0,
208
- row.message_index_end ?? 0,
209
- row.project ?? "",
210
- );
211
-
212
- if (vec.length > 0) {
213
- insertVec.run(row.id, serializeVector(vec));
214
- }
215
-
216
- insertFts.run(row.id, row.content);
162
+ const tx = sqliteDb.transaction(() => {
163
+ for (const row of data.conversations) {
164
+ insertMain.run(
165
+ row.id, row.content, row.metadata,
166
+ row.created_at, row.session_id, row.role,
167
+ row.message_index_start, row.message_index_end, row.project,
168
+ );
169
+ if (row.vector.length > 0) {
170
+ deleteVec.run(row.id);
171
+ insertVec.run(row.id, serializeVector(row.vector));
217
172
  }
218
- });
219
-
220
- tx();
221
- conversationChunksMigrated += rows.length;
222
- offset += BATCH_SIZE;
223
-
224
- if (totalConv > BATCH_SIZE) {
225
- process.stderr.write(` ${conversationChunksMigrated}/${totalConv}\r`);
173
+ insertFts.run(row.id, row.content);
226
174
  }
227
- }
228
-
175
+ });
176
+ tx();
177
+ conversationChunksMigrated = data.conversations.length;
229
178
  console.error(` ✅ ${conversationChunksMigrated} conversation chunks migrated`);
230
179
  }
231
180
 
232
- // ── Finalize ────────────────────────────────────────────────────
233
- await lanceDb.close?.();
234
181
  sqliteDb.close();
235
182
 
236
183
  const { size } = statSync(target);
@@ -239,9 +186,6 @@ export async function migrate(opts: MigrateOptions): Promise<MigrateResult> {
239
186
  return { memoriesMigrated, conversationChunksMigrated, outputSizeMB };
240
187
  }
241
188
 
242
- /**
243
- * Format a human-readable summary after migration completes.
244
- */
245
189
  export function formatMigrationSummary(
246
190
  source: string,
247
191
  target: string,
@@ -1,4 +1,4 @@
1
- import { randomUUID } from "crypto";
1
+ import { randomUUID, createHash } from "crypto";
2
2
  import type { Memory, SearchIntent, IntentProfile, HybridRow } from "../types/memory.js";
3
3
  import { isDeleted } from "../types/memory.js";
4
4
  import type { SearchResult, SearchOptions } from "../types/conversation.js";
@@ -185,6 +185,7 @@ export class MemoryService {
185
185
  const queryEmbedding = await this.embeddings.embed(query);
186
186
  const profile = INTENT_PROFILES[intent];
187
187
  const now = new Date();
188
+ const offset = Math.min(options?.offset ?? 0, 500);
188
189
 
189
190
  const hasConversationService = this.conversationService !== null;
190
191
  const historyOnly = (options?.historyOnly ?? false) && hasConversationService;
@@ -195,11 +196,14 @@ export class MemoryService {
195
196
  this.conversationService?.config.historyWeight ??
196
197
  0.75;
197
198
 
199
+ // Widen the candidate pool to account for offset
200
+ const effectiveLimit = offset + limit;
201
+
198
202
  // Run memory + history queries in parallel
199
203
  const memoryPromise =
200
204
  !historyOnly
201
205
  ? this.repository
202
- .findHybrid(queryEmbedding, query, limit * 5)
206
+ .findHybrid(queryEmbedding, query, effectiveLimit * 5)
203
207
  .then((candidates) =>
204
208
  candidates
205
209
  .filter((m) => includeDeleted || !isDeleted(m))
@@ -225,7 +229,7 @@ export class MemoryService {
225
229
  .searchHistory(
226
230
  query,
227
231
  queryEmbedding,
228
- historyOnly ? limit * 5 : limit * 3,
232
+ historyOnly ? effectiveLimit * 5 : effectiveLimit * 3,
229
233
  options?.historyFilters
230
234
  )
231
235
  .then((historyRows) =>
@@ -255,7 +259,7 @@ export class MemoryService {
255
259
  const merged = [...memoryResults, ...historyResults];
256
260
  merged.sort((a, b) => b.score - a.score);
257
261
 
258
- return merged.slice(0, limit);
262
+ return merged.slice(offset, offset + limit);
259
263
  }
260
264
 
261
265
  async trackAccess(ids: string[]): Promise<void> {
@@ -278,6 +282,19 @@ export class MemoryService {
278
282
  private static readonly UUID_ZERO =
279
283
  "00000000-0000-0000-0000-000000000000";
280
284
 
285
+ private static waypointId(project?: string): string {
286
+ if (!project?.length) return MemoryService.UUID_ZERO;
287
+ const hex = createHash("sha256").update(`waypoint:${project}`).digest("hex");
288
+ // Format as UUID: 8-4-4-4-12
289
+ return [
290
+ hex.slice(0, 8),
291
+ hex.slice(8, 12),
292
+ hex.slice(12, 16),
293
+ hex.slice(16, 20),
294
+ hex.slice(20, 32),
295
+ ].join("-");
296
+ }
297
+
281
298
  async setWaypoint(args: {
282
299
  project: string;
283
300
  branch?: string;
@@ -336,7 +353,7 @@ ${list(args.memory_ids)}`;
336
353
  };
337
354
 
338
355
  const memory: Memory = {
339
- id: MemoryService.UUID_ZERO,
356
+ id: MemoryService.waypointId(args.project),
340
357
  content,
341
358
  embedding: new Array(this.embeddings.dimension).fill(0),
342
359
  metadata,
@@ -352,7 +369,7 @@ ${list(args.memory_ids)}`;
352
369
  return memory;
353
370
  }
354
371
 
355
- async getLatestWaypoint(): Promise<Memory | null> {
356
- return await this.get(MemoryService.UUID_ZERO);
372
+ async getLatestWaypoint(project?: string): Promise<Memory | null> {
373
+ return await this.get(MemoryService.waypointId(project));
357
374
  }
358
375
  }
@@ -118,4 +118,5 @@ export interface SearchOptions {
118
118
  historyOnly?: boolean;
119
119
  historyWeight?: number;
120
120
  historyFilters?: HistoryFilters;
121
+ offset?: number;
121
122
  }