@aeriondyseti/vector-memory-mcp 2.5.0-dev.1 → 2.5.0-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/scripts/lancedb-extract.ts +181 -0
- package/scripts/warmup.ts +63 -0
- package/server/core/connection.ts +51 -1
- package/server/core/consolidation.service.ts +196 -33
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aeriondyseti/vector-memory-mcp",
|
|
3
|
-
"version": "2.5.0-dev.
|
|
3
|
+
"version": "2.5.0-dev.2",
|
|
4
4
|
"description": "A zero-configuration RAG memory server for MCP clients",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "server/index.ts",
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
},
|
|
10
10
|
"files": [
|
|
11
11
|
"server",
|
|
12
|
+
"scripts/lancedb-extract.ts",
|
|
13
|
+
"scripts/warmup.ts",
|
|
12
14
|
"README.md",
|
|
13
15
|
"LICENSE"
|
|
14
16
|
],
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Standalone LanceDB data extractor — runs in a child process so that
|
|
4
|
+
* @lancedb/lancedb native bindings never coexist with bun:sqlite's
|
|
5
|
+
* extension loading in the same process.
|
|
6
|
+
*
|
|
7
|
+
* Usage: bun scripts/lancedb-extract.ts <lance-db-path>
|
|
8
|
+
* Output: JSON on stdout — { memories: Row[], conversations: Row[] }
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const source = process.argv[2];
|
|
12
|
+
if (!source) {
|
|
13
|
+
console.error("Usage: bun scripts/lancedb-extract.ts <lance-db-path>");
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Arrow TimeUnit enum → divisor to convert to milliseconds.
|
|
18
|
+
// 0=SECOND, 1=MILLISECOND, 2=MICROSECOND, 3=NANOSECOND
|
|
19
|
+
// Negative divisor = multiply (seconds → ms needs ×1000).
|
|
20
|
+
const TIME_UNIT_TO_MS_DIVISOR: Record<number, bigint> = {
|
|
21
|
+
0: -1000n, // seconds → ms (multiply by 1000)
|
|
22
|
+
1: 1n, // ms → no conversion
|
|
23
|
+
2: 1000n, // μs → ms
|
|
24
|
+
3: 1000000n, // ns → ms
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
function buildTimestampDivisors(schema: any): Map<string, bigint> {
|
|
28
|
+
const map = new Map<string, bigint>();
|
|
29
|
+
for (const field of schema.fields) {
|
|
30
|
+
if (field.type.typeId === 10) {
|
|
31
|
+
map.set(field.name, TIME_UNIT_TO_MS_DIVISOR[field.type.unit] ?? 1n);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return map;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function columnValue(batch: any, colName: string, rowIdx: number): unknown {
|
|
38
|
+
const col = batch.getChild(colName);
|
|
39
|
+
if (!col) return undefined;
|
|
40
|
+
try {
|
|
41
|
+
return col.get(rowIdx);
|
|
42
|
+
} catch {
|
|
43
|
+
// Arrow's getter can throw on BigInt timestamps exceeding MAX_SAFE_INTEGER;
|
|
44
|
+
// fall back to the raw typed array.
|
|
45
|
+
let offset = rowIdx;
|
|
46
|
+
for (const data of col.data) {
|
|
47
|
+
if (offset < data.length) {
|
|
48
|
+
return (data.values instanceof BigInt64Array || data.values instanceof BigUint64Array)
|
|
49
|
+
? data.values[offset]
|
|
50
|
+
: null;
|
|
51
|
+
}
|
|
52
|
+
offset -= data.length;
|
|
53
|
+
}
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function toEpochMs(value: unknown, divisor: bigint = 1n): number {
|
|
59
|
+
if (value == null) return Date.now();
|
|
60
|
+
if (value instanceof Date) return value.getTime();
|
|
61
|
+
if (typeof value === "bigint") {
|
|
62
|
+
if (divisor < 0n) return Number(value * -divisor); // seconds → ms
|
|
63
|
+
if (divisor === 1n) return Number(value);
|
|
64
|
+
return Number(value / divisor);
|
|
65
|
+
}
|
|
66
|
+
if (typeof value === "number") {
|
|
67
|
+
if (divisor < 0n) return value * Number(-divisor);
|
|
68
|
+
if (divisor === 1n) return value;
|
|
69
|
+
return Math.floor(value / Number(divisor));
|
|
70
|
+
}
|
|
71
|
+
return Date.now();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function toFloatArray(vec: unknown): number[] {
|
|
75
|
+
if (Array.isArray(vec)) return vec;
|
|
76
|
+
if (vec instanceof Float32Array) return Array.from(vec);
|
|
77
|
+
if (vec && typeof (vec as any).toArray === "function") {
|
|
78
|
+
return Array.from((vec as any).toArray());
|
|
79
|
+
}
|
|
80
|
+
if (ArrayBuffer.isView(vec)) {
|
|
81
|
+
const view = vec as DataView;
|
|
82
|
+
return Array.from(new Float32Array(view.buffer, view.byteOffset, view.byteLength / 4));
|
|
83
|
+
}
|
|
84
|
+
return [];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const BATCH_SIZE = 100;
|
|
88
|
+
const lancedb = await import("@lancedb/lancedb");
|
|
89
|
+
const db = await lancedb.connect(source);
|
|
90
|
+
const tableNames = await db.tableNames();
|
|
91
|
+
console.error(`Found tables: ${tableNames.join(", ")}`);
|
|
92
|
+
|
|
93
|
+
const result: { memories: any[]; conversations: any[] } = {
|
|
94
|
+
memories: [],
|
|
95
|
+
conversations: [],
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
if (tableNames.includes("memories")) {
|
|
99
|
+
const table = await db.openTable("memories");
|
|
100
|
+
const total = await table.countRows();
|
|
101
|
+
console.error(`Reading ${total} memories...`);
|
|
102
|
+
|
|
103
|
+
// Paginated scan — query().toArrow() without offset/limit returns
|
|
104
|
+
// non-deterministic results that can duplicate some rows and skip others.
|
|
105
|
+
const schemaSample = await table.query().limit(1).toArrow();
|
|
106
|
+
const tsDivisors = buildTimestampDivisors(schemaSample.schema);
|
|
107
|
+
const seen = new Map<string, any>();
|
|
108
|
+
|
|
109
|
+
for (let offset = 0; offset < total; offset += BATCH_SIZE) {
|
|
110
|
+
const arrowTable = await table.query().offset(offset).limit(BATCH_SIZE).toArrow();
|
|
111
|
+
for (const batch of arrowTable.batches) {
|
|
112
|
+
for (let i = 0; i < batch.numRows; i++) {
|
|
113
|
+
const id = columnValue(batch, "id", i) as string;
|
|
114
|
+
const content = columnValue(batch, "content", i) as string;
|
|
115
|
+
const lastAccessed = columnValue(batch, "last_accessed", i);
|
|
116
|
+
const accessedMs = lastAccessed != null ? toEpochMs(lastAccessed, tsDivisors.get("last_accessed")) : null;
|
|
117
|
+
// Deduplicate by ID: prefer most recently accessed, then longest content.
|
|
118
|
+
const existing = seen.get(id);
|
|
119
|
+
if (existing) {
|
|
120
|
+
const existingAccess = existing.last_accessed ?? 0;
|
|
121
|
+
const newAccess = accessedMs ?? 0;
|
|
122
|
+
if (newAccess < existingAccess) continue;
|
|
123
|
+
if (newAccess === existingAccess && content.length <= existing.content.length) continue;
|
|
124
|
+
}
|
|
125
|
+
seen.set(id, {
|
|
126
|
+
id,
|
|
127
|
+
content,
|
|
128
|
+
metadata: columnValue(batch, "metadata", i) ?? "{}",
|
|
129
|
+
vector: toFloatArray(columnValue(batch, "vector", i)),
|
|
130
|
+
created_at: toEpochMs(columnValue(batch, "created_at", i), tsDivisors.get("created_at")),
|
|
131
|
+
updated_at: toEpochMs(columnValue(batch, "updated_at", i), tsDivisors.get("updated_at")),
|
|
132
|
+
last_accessed: accessedMs,
|
|
133
|
+
superseded_by: columnValue(batch, "superseded_by", i) ?? null,
|
|
134
|
+
usefulness: columnValue(batch, "usefulness", i) ?? 0,
|
|
135
|
+
access_count: columnValue(batch, "access_count", i) ?? 0,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
result.memories = [...seen.values()];
|
|
141
|
+
console.error(` ${result.memories.length} unique memories read (${total} rows scanned)`);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (tableNames.includes("conversation_history")) {
|
|
145
|
+
const table = await db.openTable("conversation_history");
|
|
146
|
+
const total = await table.countRows();
|
|
147
|
+
console.error(`Reading ${total} conversation chunks...`);
|
|
148
|
+
|
|
149
|
+
const schemaSample = await table.query().limit(1).toArrow();
|
|
150
|
+
const tsDivisors = buildTimestampDivisors(schemaSample.schema);
|
|
151
|
+
const seen = new Map<string, any>();
|
|
152
|
+
|
|
153
|
+
for (let offset = 0; offset < total; offset += BATCH_SIZE) {
|
|
154
|
+
const arrowTable = await table.query().offset(offset).limit(BATCH_SIZE).toArrow();
|
|
155
|
+
for (const batch of arrowTable.batches) {
|
|
156
|
+
for (let i = 0; i < batch.numRows; i++) {
|
|
157
|
+
const id = columnValue(batch, "id", i) as string;
|
|
158
|
+
const content = columnValue(batch, "content", i) as string;
|
|
159
|
+
const existing = seen.get(id);
|
|
160
|
+
if (existing && existing.content.length >= content.length) continue;
|
|
161
|
+
seen.set(id, {
|
|
162
|
+
id,
|
|
163
|
+
content,
|
|
164
|
+
metadata: columnValue(batch, "metadata", i) ?? "{}",
|
|
165
|
+
vector: toFloatArray(columnValue(batch, "vector", i)),
|
|
166
|
+
created_at: toEpochMs(columnValue(batch, "created_at", i), tsDivisors.get("created_at")),
|
|
167
|
+
session_id: columnValue(batch, "session_id", i),
|
|
168
|
+
role: columnValue(batch, "role", i),
|
|
169
|
+
message_index_start: columnValue(batch, "message_index_start", i) ?? 0,
|
|
170
|
+
message_index_end: columnValue(batch, "message_index_end", i) ?? 0,
|
|
171
|
+
project: columnValue(batch, "project", i) ?? "",
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
result.conversations = [...seen.values()];
|
|
177
|
+
console.error(` ${result.conversations.length} unique conversation chunks read (${total} rows scanned)`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
await db.close?.();
|
|
181
|
+
process.stdout.write(JSON.stringify(result));
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Warmup script to pre-download ML models and verify dependencies
|
|
5
|
+
* This runs during installation to ensure everything is ready to use
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { config } from "../server/config/index";
|
|
9
|
+
import { EmbeddingsService } from "../server/core/embeddings.service";
|
|
10
|
+
|
|
11
|
+
async function warmup(): Promise<void> {
|
|
12
|
+
console.log("🔥 Warming up vector-memory-mcp...");
|
|
13
|
+
console.log();
|
|
14
|
+
|
|
15
|
+
try {
|
|
16
|
+
// Check native dependencies
|
|
17
|
+
console.log("✓ Checking native dependencies...");
|
|
18
|
+
try {
|
|
19
|
+
await import("onnxruntime-node");
|
|
20
|
+
console.log(" ✓ onnxruntime-node loaded");
|
|
21
|
+
} catch (e) {
|
|
22
|
+
console.error(" ✗ onnxruntime-node failed:", (e as Error).message);
|
|
23
|
+
process.exit(1);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
console.log();
|
|
27
|
+
|
|
28
|
+
// Initialize embeddings service to download model
|
|
29
|
+
console.log("📥 Downloading ML model (this may take a minute)...");
|
|
30
|
+
console.log(` Model: ${config.embeddingModel}`);
|
|
31
|
+
console.log();
|
|
32
|
+
|
|
33
|
+
const embeddings = new EmbeddingsService(
|
|
34
|
+
config.embeddingModel,
|
|
35
|
+
config.embeddingDimension
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
// Trigger model download by generating a test embedding
|
|
39
|
+
const startTime = Date.now();
|
|
40
|
+
await embeddings.embed("warmup test");
|
|
41
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
|
42
|
+
|
|
43
|
+
console.log();
|
|
44
|
+
console.log(`✅ Warmup complete! (${duration}s)`);
|
|
45
|
+
console.log();
|
|
46
|
+
console.log("Ready to use! Configure your MCP client and restart to get started.");
|
|
47
|
+
console.log();
|
|
48
|
+
} catch (error) {
|
|
49
|
+
console.error();
|
|
50
|
+
console.error("❌ Warmup failed:", error);
|
|
51
|
+
console.error();
|
|
52
|
+
console.error("This is not a critical error - the server will download models on first run.");
|
|
53
|
+
console.error("You can try running 'vector-memory-mcp warmup' manually later.");
|
|
54
|
+
process.exit(0); // Exit successfully to not block installation
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Only run if this is the main module
|
|
59
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
60
|
+
warmup();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export { warmup };
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
import { Database } from "bun:sqlite";
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
closeSync,
|
|
4
|
+
existsSync,
|
|
5
|
+
mkdirSync,
|
|
6
|
+
openSync,
|
|
7
|
+
readdirSync,
|
|
8
|
+
readFileSync,
|
|
9
|
+
renameSync,
|
|
10
|
+
statSync,
|
|
11
|
+
unlinkSync,
|
|
12
|
+
writeSync,
|
|
13
|
+
} from "fs";
|
|
3
14
|
import { dirname } from "path";
|
|
4
15
|
import { removeVec0Tables, runMigrations } from "./migrations";
|
|
5
16
|
|
|
@@ -109,8 +120,47 @@ function guardedVec0Cleanup(dbPath: string): void {
|
|
|
109
120
|
* entries (never for healthy databases) and is serialized by an exclusive
|
|
110
121
|
* lock; migrations are user_version-gated inside an immediate transaction.
|
|
111
122
|
*/
|
|
123
|
+
/**
|
|
124
|
+
* Legacy LanceDB installs used the db path as a *directory*
|
|
125
|
+
* (e.g. ~/.vector-memory/memories.db/memories.lance). SQLite needs a file
|
|
126
|
+
* there, so move the directory aside instead of dying with SQLITE_CANTOPEN.
|
|
127
|
+
* Returns the path the directory was moved to, or null if nothing was done.
|
|
128
|
+
*/
|
|
129
|
+
export function relocateLegacyLanceDir(dbPath: string): string | null {
|
|
130
|
+
if (!existsSync(dbPath) || !statSync(dbPath).isDirectory()) return null;
|
|
131
|
+
|
|
132
|
+
const entries = readdirSync(dbPath);
|
|
133
|
+
const isLance = entries.some(
|
|
134
|
+
(e) => e.endsWith(".lance") || e === "_versions" || e === "_indices",
|
|
135
|
+
);
|
|
136
|
+
if (!isLance) {
|
|
137
|
+
throw new Error(
|
|
138
|
+
`Database path ${dbPath} is a directory, not a SQLite file. ` +
|
|
139
|
+
"Move or remove it, or point --db-file at a different location.",
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
let target = `${dbPath}.lancedb`;
|
|
144
|
+
for (let n = 1; existsSync(target); n++) {
|
|
145
|
+
target = `${dbPath}.lancedb.${n}`;
|
|
146
|
+
}
|
|
147
|
+
try {
|
|
148
|
+
renameSync(dbPath, target);
|
|
149
|
+
} catch (err) {
|
|
150
|
+
// A concurrently starting process won the rename — nothing left to move.
|
|
151
|
+
if ((err as NodeJS.ErrnoException).code === "ENOENT") return null;
|
|
152
|
+
throw err;
|
|
153
|
+
}
|
|
154
|
+
console.error(
|
|
155
|
+
`[vector-memory-mcp] Found a legacy LanceDB store at ${dbPath} — ` +
|
|
156
|
+
`moved it to ${target}. A fresh SQLite database will be created.`,
|
|
157
|
+
);
|
|
158
|
+
return target;
|
|
159
|
+
}
|
|
160
|
+
|
|
112
161
|
export function connectToDatabase(dbPath: string): Database {
|
|
113
162
|
mkdirSync(dirname(dbPath), { recursive: true });
|
|
163
|
+
relocateLegacyLanceDir(dbPath);
|
|
114
164
|
|
|
115
165
|
// Remove orphaned vec0 virtual table entries before bun:sqlite opens the
|
|
116
166
|
// database. bun:sqlite cannot modify sqlite_master, so this uses the
|
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
import { Database } from "bun:sqlite";
|
|
2
2
|
import { createHash, randomUUID } from "crypto";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
copyFileSync,
|
|
5
|
+
existsSync,
|
|
6
|
+
readdirSync,
|
|
7
|
+
readFileSync,
|
|
8
|
+
renameSync,
|
|
9
|
+
statSync,
|
|
10
|
+
} from "fs";
|
|
4
11
|
import { readFile } from "fs/promises";
|
|
5
12
|
import { homedir } from "os";
|
|
6
|
-
import { dirname, join } from "path";
|
|
13
|
+
import { dirname, join, resolve } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
7
15
|
import type { EmbeddingsService } from "./embeddings.service";
|
|
8
16
|
import { normalizeProject } from "./project";
|
|
9
17
|
import { safeParseJsonObject, serializeVector } from "./sqlite-utils";
|
|
@@ -58,6 +66,70 @@ interface SourceMemoryRow {
|
|
|
58
66
|
vector: Buffer | null;
|
|
59
67
|
}
|
|
60
68
|
|
|
69
|
+
interface SourceConversationRow {
|
|
70
|
+
id: string;
|
|
71
|
+
content: string;
|
|
72
|
+
metadata: string;
|
|
73
|
+
created_at: number;
|
|
74
|
+
session_id: string;
|
|
75
|
+
role: string;
|
|
76
|
+
message_index_start: number;
|
|
77
|
+
message_index_end: number;
|
|
78
|
+
project: string;
|
|
79
|
+
vector: Buffer | null;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* LanceDB-era repo stores used `.vector-memory/memories.db` as a *directory*.
|
|
84
|
+
* Extraction shells out because @lancedb/lancedb's native bindings cannot
|
|
85
|
+
* coexist with bun:sqlite in one process.
|
|
86
|
+
*/
|
|
87
|
+
function isLanceDir(entries: string[]): boolean {
|
|
88
|
+
return entries.some(
|
|
89
|
+
(e) => e.endsWith(".lance") || e === "_versions" || e === "_indices",
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async function extractLanceData(path: string): Promise<{
|
|
94
|
+
memories: Array<Omit<SourceMemoryRow, "vector"> & { vector: number[] }>;
|
|
95
|
+
conversations: Array<
|
|
96
|
+
Omit<SourceConversationRow, "vector"> & { vector: number[] }
|
|
97
|
+
>;
|
|
98
|
+
}> {
|
|
99
|
+
const script = resolve(
|
|
100
|
+
dirname(fileURLToPath(import.meta.url)),
|
|
101
|
+
"..",
|
|
102
|
+
"..",
|
|
103
|
+
"scripts",
|
|
104
|
+
"lancedb-extract.ts",
|
|
105
|
+
);
|
|
106
|
+
if (!existsSync(script)) {
|
|
107
|
+
throw new Error(`LanceDB extract script not found at ${script}`);
|
|
108
|
+
}
|
|
109
|
+
const proc = Bun.spawn([process.execPath, script, path], {
|
|
110
|
+
stdout: "pipe",
|
|
111
|
+
stderr: "inherit",
|
|
112
|
+
});
|
|
113
|
+
const output = await new Response(proc.stdout).text();
|
|
114
|
+
const exitCode = await proc.exited;
|
|
115
|
+
if (exitCode !== 0) {
|
|
116
|
+
throw new Error(`LanceDB extraction failed (exit code ${exitCode})`);
|
|
117
|
+
}
|
|
118
|
+
return JSON.parse(output);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Old schema versions stored vectors in vec0 virtual tables, which need the
|
|
123
|
+
* sqlite-vec extension to query. Sources are opened read-only without it, so
|
|
124
|
+
* treat those vectors as unreadable — rows are re-embedded on import.
|
|
125
|
+
*/
|
|
126
|
+
function vecTableReadable(db: Database, name: string): boolean {
|
|
127
|
+
const row = db
|
|
128
|
+
.prepare("SELECT sql FROM sqlite_master WHERE name = ?")
|
|
129
|
+
.get(name) as { sql: string | null } | null;
|
|
130
|
+
return row != null && !(row.sql ?? "").includes("vec0");
|
|
131
|
+
}
|
|
132
|
+
|
|
61
133
|
/** Mirrors MemoryService.waypointId — must stay byte-identical. */
|
|
62
134
|
function waypointIdFor(project: string): string {
|
|
63
135
|
const normalized = project.trim().toLowerCase();
|
|
@@ -219,6 +291,18 @@ export class ConsolidationService {
|
|
|
219
291
|
errors: [],
|
|
220
292
|
};
|
|
221
293
|
|
|
294
|
+
// LanceDB-era stores are directories, not SQLite files
|
|
295
|
+
if (statSync(sourceDbPath).isDirectory()) {
|
|
296
|
+
await this.consolidateLanceSource(
|
|
297
|
+
sourceDbPath,
|
|
298
|
+
project,
|
|
299
|
+
importBatch,
|
|
300
|
+
options,
|
|
301
|
+
report,
|
|
302
|
+
);
|
|
303
|
+
return report;
|
|
304
|
+
}
|
|
305
|
+
|
|
222
306
|
let source: Database;
|
|
223
307
|
try {
|
|
224
308
|
source = new Database(sourceDbPath, { readonly: true });
|
|
@@ -242,6 +326,68 @@ export class ConsolidationService {
|
|
|
242
326
|
return report;
|
|
243
327
|
}
|
|
244
328
|
|
|
329
|
+
private async consolidateLanceSource(
|
|
330
|
+
sourceDbPath: string,
|
|
331
|
+
project: string,
|
|
332
|
+
importBatch: string,
|
|
333
|
+
options: ConsolidationOptions,
|
|
334
|
+
report: SourceReport,
|
|
335
|
+
): Promise<void> {
|
|
336
|
+
const entries = readdirSync(sourceDbPath);
|
|
337
|
+
if (entries.length === 0) return; // failed init left an empty dir — nothing to import
|
|
338
|
+
if (!isLanceDir(entries)) {
|
|
339
|
+
report.errors.push(
|
|
340
|
+
`source is a directory but not a LanceDB store: ${sourceDbPath}`,
|
|
341
|
+
);
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
let data: Awaited<ReturnType<typeof extractLanceData>>;
|
|
346
|
+
try {
|
|
347
|
+
data = await extractLanceData(sourceDbPath);
|
|
348
|
+
} catch (e) {
|
|
349
|
+
report.errors.push(
|
|
350
|
+
`LanceDB extraction failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
351
|
+
);
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const expectedBytes = this.embeddings.dimension * 4;
|
|
356
|
+
const toBuffer = (vector: number[]): Buffer | null => {
|
|
357
|
+
const buf = vector.length > 0 ? serializeVector(vector) : null;
|
|
358
|
+
// Wrong-dimension vectors (model change) are dropped → re-embedded
|
|
359
|
+
return buf && buf.byteLength === expectedBytes ? buf : null;
|
|
360
|
+
};
|
|
361
|
+
|
|
362
|
+
const memoryRows: SourceMemoryRow[] = data.memories.map((m) => ({
|
|
363
|
+
...m,
|
|
364
|
+
vector: toBuffer(m.vector),
|
|
365
|
+
}));
|
|
366
|
+
const conversationRows: SourceConversationRow[] = data.conversations.map(
|
|
367
|
+
(c) => ({ ...c, vector: toBuffer(c.vector) }),
|
|
368
|
+
);
|
|
369
|
+
|
|
370
|
+
try {
|
|
371
|
+
await this.processMemoryRows(
|
|
372
|
+
memoryRows,
|
|
373
|
+
project,
|
|
374
|
+
importBatch,
|
|
375
|
+
options,
|
|
376
|
+
report,
|
|
377
|
+
);
|
|
378
|
+
this.processConversationRows(
|
|
379
|
+
conversationRows,
|
|
380
|
+
project,
|
|
381
|
+
importBatch,
|
|
382
|
+
options,
|
|
383
|
+
report,
|
|
384
|
+
);
|
|
385
|
+
await this.importIndexState(null, sourceDbPath, project, options, report);
|
|
386
|
+
} catch (e) {
|
|
387
|
+
report.errors.push(e instanceof Error ? e.message : String(e));
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
245
391
|
// ── Memories ────────────────────────────────────────────────────────
|
|
246
392
|
|
|
247
393
|
private async importMemories(
|
|
@@ -253,12 +399,25 @@ export class ConsolidationService {
|
|
|
253
399
|
): Promise<void> {
|
|
254
400
|
if (!tableExists(source, "memories")) return;
|
|
255
401
|
|
|
256
|
-
const rows =
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
402
|
+
const rows = (
|
|
403
|
+
vecTableReadable(source, "memories_vec")
|
|
404
|
+
? source.prepare(
|
|
405
|
+
`SELECT m.*, v.vector FROM memories m
|
|
406
|
+
LEFT JOIN memories_vec v ON m.id = v.id`,
|
|
407
|
+
)
|
|
408
|
+
: source.prepare("SELECT m.*, NULL AS vector FROM memories m")
|
|
409
|
+
).all() as SourceMemoryRow[];
|
|
410
|
+
|
|
411
|
+
await this.processMemoryRows(rows, project, importBatch, options, report);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
private async processMemoryRows(
|
|
415
|
+
rows: SourceMemoryRow[],
|
|
416
|
+
project: string,
|
|
417
|
+
importBatch: string,
|
|
418
|
+
options: ConsolidationOptions,
|
|
419
|
+
report: SourceReport,
|
|
420
|
+
): Promise<void> {
|
|
262
421
|
if (rows.length === 0) return;
|
|
263
422
|
|
|
264
423
|
const targetGet = this.target.prepare(
|
|
@@ -316,6 +475,12 @@ export class ConsolidationService {
|
|
|
316
475
|
}
|
|
317
476
|
}
|
|
318
477
|
|
|
478
|
+
if (options.dryRun) {
|
|
479
|
+
report.memoriesImported = toImport.length;
|
|
480
|
+
this.collectUnresolved(rows, sourceIds, report);
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
|
|
319
484
|
// Pre-compute embeddings for rows whose vectors are missing or have the
|
|
320
485
|
// wrong dimension (model change) — outside any transaction.
|
|
321
486
|
const expectedBytes = this.embeddings.dimension * 4;
|
|
@@ -329,12 +494,6 @@ export class ConsolidationService {
|
|
|
329
494
|
new Array(this.embeddings.dimension).fill(0),
|
|
330
495
|
);
|
|
331
496
|
|
|
332
|
-
if (options.dryRun) {
|
|
333
|
-
report.memoriesImported = toImport.length;
|
|
334
|
-
this.collectUnresolved(rows, sourceIds, report);
|
|
335
|
-
return;
|
|
336
|
-
}
|
|
337
|
-
|
|
338
497
|
const insertMain = this.target.prepare(
|
|
339
498
|
`INSERT INTO memories (id, content, metadata, created_at, updated_at, superseded_by, usefulness, access_count, last_accessed, project)
|
|
340
499
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
@@ -472,23 +631,27 @@ export class ConsolidationService {
|
|
|
472
631
|
): void {
|
|
473
632
|
if (!tableExists(source, "conversation_history")) return;
|
|
474
633
|
|
|
475
|
-
const rows =
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
634
|
+
const rows = (
|
|
635
|
+
vecTableReadable(source, "conversation_history_vec")
|
|
636
|
+
? source.prepare(
|
|
637
|
+
`SELECT c.*, v.vector FROM conversation_history c
|
|
638
|
+
LEFT JOIN conversation_history_vec v ON c.id = v.id`,
|
|
639
|
+
)
|
|
640
|
+
: source.prepare(
|
|
641
|
+
"SELECT c.*, NULL AS vector FROM conversation_history c",
|
|
642
|
+
)
|
|
643
|
+
).all() as SourceConversationRow[];
|
|
644
|
+
|
|
645
|
+
this.processConversationRows(rows, project, importBatch, options, report);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
private processConversationRows(
|
|
649
|
+
rows: SourceConversationRow[],
|
|
650
|
+
project: string,
|
|
651
|
+
importBatch: string,
|
|
652
|
+
options: ConsolidationOptions,
|
|
653
|
+
report: SourceReport,
|
|
654
|
+
): void {
|
|
492
655
|
if (rows.length === 0) return;
|
|
493
656
|
|
|
494
657
|
const existsStmt = this.target.prepare(
|
|
@@ -550,7 +713,7 @@ export class ConsolidationService {
|
|
|
550
713
|
// ── Conversation index state ────────────────────────────────────────
|
|
551
714
|
|
|
552
715
|
private async importIndexState(
|
|
553
|
-
source: Database,
|
|
716
|
+
source: Database | null,
|
|
554
717
|
sourceDbPath: string,
|
|
555
718
|
project: string,
|
|
556
719
|
options: ConsolidationOptions,
|
|
@@ -569,7 +732,7 @@ export class ConsolidationService {
|
|
|
569
732
|
};
|
|
570
733
|
|
|
571
734
|
const entries: StateRow[] = [];
|
|
572
|
-
if (tableExists(source, "conversation_index_state")) {
|
|
735
|
+
if (source && tableExists(source, "conversation_index_state")) {
|
|
573
736
|
entries.push(
|
|
574
737
|
...(source
|
|
575
738
|
.prepare("SELECT * FROM conversation_index_state")
|