pdf-brain 0.9.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -8
- package/scripts/migration/pglite-to-libsql.ts +480 -0
- package/src/cli.ts +7 -217
- package/src/index.ts +61 -27
- package/src/services/Database.ts +13 -658
- package/src/services/EmbeddingQueue.test.ts +352 -0
- package/src/services/EmbeddingQueue.ts +241 -0
- package/src/services/LibSQLDatabase.test.ts +385 -0
- package/src/services/LibSQLDatabase.ts +579 -0
- package/src/services/Daemon.test.ts +0 -215
- package/src/services/Daemon.ts +0 -323
- package/src/services/Database.test.ts +0 -614
- package/src/services/DatabaseClient.test.ts +0 -292
- package/src/services/DatabaseClient.ts +0 -635
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pdf-brain",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"description": "Local PDF knowledge base with vector search",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -23,17 +23,13 @@
|
|
|
23
23
|
"@effect/platform": "^0.72.0",
|
|
24
24
|
"@effect/platform-bun": "^0.52.0",
|
|
25
25
|
"@effect/schema": "^0.75.0",
|
|
26
|
-
"@
|
|
27
|
-
"@electric-sql/pglite-socket": "^0.0.19",
|
|
28
|
-
"@electric-sql/pglite-tools": "^0.2.19",
|
|
29
|
-
"@types/pg": "^8.16.0",
|
|
26
|
+
"@libsql/client": "^0.15.15",
|
|
30
27
|
"ai": "^5.0.115",
|
|
31
28
|
"effect": "^3.12.0",
|
|
32
29
|
"gray-matter": "^4.0.3",
|
|
33
30
|
"ink": "^6.5.1",
|
|
34
31
|
"ink-spinner": "^5.0.0",
|
|
35
32
|
"mdast-util-to-string": "^4.0.0",
|
|
36
|
-
"pg": "^8.16.3",
|
|
37
33
|
"react": "^19.2.3",
|
|
38
34
|
"remark-frontmatter": "^5.0.0",
|
|
39
35
|
"remark-gfm": "^4.0.1",
|
|
@@ -63,8 +59,7 @@
|
|
|
63
59
|
"pdf",
|
|
64
60
|
"vector-search",
|
|
65
61
|
"embeddings",
|
|
66
|
-
"
|
|
67
|
-
"pgvector",
|
|
62
|
+
"libsql",
|
|
68
63
|
"knowledge-base",
|
|
69
64
|
"effect-ts"
|
|
70
65
|
],
|
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Migrate data from PGlite to LibSQL
|
|
4
|
+
*
|
|
5
|
+
* This script migrates an existing PGlite database to LibSQL format.
|
|
6
|
+
* Handles schema translation, vector format conversion (pgvector → F32_BLOB),
|
|
7
|
+
* and data migration.
|
|
8
|
+
*
|
|
9
|
+
* Prerequisites:
|
|
10
|
+
* - Existing PGlite database at specified path
|
|
11
|
+
* - Bun runtime (for @libsql/client)
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* bun run scripts/migration/pglite-to-libsql.ts [pglite-db-path] [libsql-db-path]
|
|
15
|
+
*
|
|
16
|
+
* Example:
|
|
17
|
+
* bun run scripts/migration/pglite-to-libsql.ts \
|
|
18
|
+
* ~/Documents/.pdf-library/library \
|
|
19
|
+
* ~/Documents/.pdf-library/library-libsql.db
|
|
20
|
+
*
|
|
21
|
+
* Vector Format Conversion:
|
|
22
|
+
* - PGlite: vector(1024) column with '[1.2,3.4,...]' text format
|
|
23
|
+
* - LibSQL: F32_BLOB(1024) column with JSON.stringify([1.2,3.4,...])
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { PGlite } from "@electric-sql/pglite";
|
|
27
|
+
import { vector } from "@electric-sql/pglite/vector";
|
|
28
|
+
import { createClient } from "@libsql/client";
|
|
29
|
+
import { existsSync, mkdirSync } from "fs";
|
|
30
|
+
import { dirname } from "path";
|
|
31
|
+
|
|
32
|
+
// Embedding dimension for mxbai-embed-large
|
|
33
|
+
const EMBEDDING_DIM = 1024;
|
|
34
|
+
|
|
35
|
+
const args = process.argv.slice(2);
|
|
36
|
+
const pglitePath =
|
|
37
|
+
args[0] ||
|
|
38
|
+
`${process.env.HOME}/Documents/.pdf-library/library`.replace(".db", "");
|
|
39
|
+
const libsqlPath =
|
|
40
|
+
args[1] || `${process.env.HOME}/Documents/.pdf-library/library-libsql.db`;
|
|
41
|
+
|
|
42
|
+
interface MigrationStats {
|
|
43
|
+
documents: number;
|
|
44
|
+
chunks: number;
|
|
45
|
+
embeddings: number;
|
|
46
|
+
skippedEmbeddings: number;
|
|
47
|
+
errors: string[];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Parse pgvector text format to number array
|
|
52
|
+
* PGlite returns vectors as '[1.2,3.4,5.6,...]'
|
|
53
|
+
*/
|
|
54
|
+
function parseVector(vectorText: string): number[] {
|
|
55
|
+
// Remove brackets and parse
|
|
56
|
+
const cleaned = vectorText.replace(/^\[|\]$/g, "");
|
|
57
|
+
return cleaned.split(",").map((v) => parseFloat(v.trim()));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Initialize LibSQL schema
|
|
62
|
+
*/
|
|
63
|
+
async function initLibSQLSchema(client: ReturnType<typeof createClient>) {
|
|
64
|
+
console.log("Creating LibSQL schema...");
|
|
65
|
+
|
|
66
|
+
// Documents table
|
|
67
|
+
await client.execute(`
|
|
68
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
69
|
+
id TEXT PRIMARY KEY,
|
|
70
|
+
title TEXT NOT NULL,
|
|
71
|
+
path TEXT NOT NULL UNIQUE,
|
|
72
|
+
added_at TEXT NOT NULL,
|
|
73
|
+
page_count INTEGER NOT NULL,
|
|
74
|
+
size_bytes INTEGER NOT NULL,
|
|
75
|
+
tags TEXT DEFAULT '[]',
|
|
76
|
+
metadata TEXT DEFAULT '{}'
|
|
77
|
+
)
|
|
78
|
+
`);
|
|
79
|
+
|
|
80
|
+
// Chunks table
|
|
81
|
+
await client.execute(`
|
|
82
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
83
|
+
id TEXT PRIMARY KEY,
|
|
84
|
+
doc_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
85
|
+
page INTEGER NOT NULL,
|
|
86
|
+
chunk_index INTEGER NOT NULL,
|
|
87
|
+
content TEXT NOT NULL
|
|
88
|
+
)
|
|
89
|
+
`);
|
|
90
|
+
|
|
91
|
+
// Embeddings table with F32_BLOB
|
|
92
|
+
await client.execute(`
|
|
93
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
94
|
+
chunk_id TEXT PRIMARY KEY REFERENCES chunks(id) ON DELETE CASCADE,
|
|
95
|
+
embedding F32_BLOB(${EMBEDDING_DIM}) NOT NULL
|
|
96
|
+
)
|
|
97
|
+
`);
|
|
98
|
+
|
|
99
|
+
// Create indexes
|
|
100
|
+
await client.execute(
|
|
101
|
+
`CREATE INDEX IF NOT EXISTS idx_chunks_doc ON chunks(doc_id)`
|
|
102
|
+
);
|
|
103
|
+
await client.execute(
|
|
104
|
+
`CREATE INDEX IF NOT EXISTS idx_docs_path ON documents(path)`
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
console.log("✓ Schema created");
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Migrate documents from PGlite to LibSQL
|
|
112
|
+
*/
|
|
113
|
+
async function migrateDocuments(
|
|
114
|
+
pgDb: PGlite,
|
|
115
|
+
libsqlClient: ReturnType<typeof createClient>,
|
|
116
|
+
stats: MigrationStats
|
|
117
|
+
) {
|
|
118
|
+
console.log("\nMigrating documents...");
|
|
119
|
+
|
|
120
|
+
const result = await pgDb.query("SELECT * FROM documents ORDER BY id");
|
|
121
|
+
const docs = result.rows;
|
|
122
|
+
|
|
123
|
+
console.log(` Found ${docs.length} documents`);
|
|
124
|
+
|
|
125
|
+
for (const doc of docs) {
|
|
126
|
+
try {
|
|
127
|
+
const docRow = doc as {
|
|
128
|
+
id: string;
|
|
129
|
+
title: string;
|
|
130
|
+
path: string;
|
|
131
|
+
added_at: string;
|
|
132
|
+
page_count: number;
|
|
133
|
+
size_bytes: number;
|
|
134
|
+
tags: unknown;
|
|
135
|
+
metadata: unknown;
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
await libsqlClient.execute({
|
|
139
|
+
sql: `INSERT INTO documents (id, title, path, added_at, page_count, size_bytes, tags, metadata)
|
|
140
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
141
|
+
args: [
|
|
142
|
+
docRow.id,
|
|
143
|
+
docRow.title,
|
|
144
|
+
docRow.path,
|
|
145
|
+
docRow.added_at, // Already ISO string in PGlite
|
|
146
|
+
docRow.page_count,
|
|
147
|
+
docRow.size_bytes,
|
|
148
|
+
JSON.stringify(docRow.tags), // JSONB → TEXT
|
|
149
|
+
JSON.stringify(docRow.metadata || {}), // JSONB → TEXT
|
|
150
|
+
],
|
|
151
|
+
});
|
|
152
|
+
stats.documents++;
|
|
153
|
+
} catch (e) {
|
|
154
|
+
const error = `Document ${(doc as { id: string }).id}: ${e}`;
|
|
155
|
+
stats.errors.push(error);
|
|
156
|
+
console.error(` ✗ ${error}`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
console.log(` ✓ Migrated ${stats.documents} documents`);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Migrate chunks from PGlite to LibSQL
|
|
165
|
+
*/
|
|
166
|
+
async function migrateChunks(
|
|
167
|
+
pgDb: PGlite,
|
|
168
|
+
libsqlClient: ReturnType<typeof createClient>,
|
|
169
|
+
stats: MigrationStats
|
|
170
|
+
) {
|
|
171
|
+
console.log("\nMigrating chunks...");
|
|
172
|
+
|
|
173
|
+
const countResult = await pgDb.query("SELECT COUNT(*) as c FROM chunks");
|
|
174
|
+
const totalChunks = parseInt((countResult.rows[0] as any).c);
|
|
175
|
+
|
|
176
|
+
console.log(` Found ${totalChunks} chunks`);
|
|
177
|
+
|
|
178
|
+
const batchSize = 100;
|
|
179
|
+
let migrated = 0;
|
|
180
|
+
|
|
181
|
+
for (let offset = 0; offset < totalChunks; offset += batchSize) {
|
|
182
|
+
const batch = await pgDb.query(
|
|
183
|
+
`SELECT * FROM chunks ORDER BY id LIMIT ${batchSize} OFFSET ${offset}`
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
// Use libSQL batch for transaction
|
|
187
|
+
const statements = batch.rows.map((chunk) => {
|
|
188
|
+
const chunkRow = chunk as {
|
|
189
|
+
id: string;
|
|
190
|
+
doc_id: string;
|
|
191
|
+
page: number;
|
|
192
|
+
chunk_index: number;
|
|
193
|
+
content: string;
|
|
194
|
+
};
|
|
195
|
+
return {
|
|
196
|
+
sql: "INSERT INTO chunks (id, doc_id, page, chunk_index, content) VALUES (?, ?, ?, ?, ?)",
|
|
197
|
+
args: [
|
|
198
|
+
chunkRow.id,
|
|
199
|
+
chunkRow.doc_id,
|
|
200
|
+
chunkRow.page,
|
|
201
|
+
chunkRow.chunk_index,
|
|
202
|
+
chunkRow.content,
|
|
203
|
+
],
|
|
204
|
+
};
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
try {
|
|
208
|
+
await libsqlClient.batch(statements, "write");
|
|
209
|
+
migrated += batch.rows.length;
|
|
210
|
+
stats.chunks += batch.rows.length;
|
|
211
|
+
} catch (e) {
|
|
212
|
+
const error = `Chunk batch at offset ${offset}: ${e}`;
|
|
213
|
+
stats.errors.push(error);
|
|
214
|
+
console.error(` ✗ ${error}`);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (migrated % 1000 === 0 || migrated === totalChunks) {
|
|
218
|
+
console.log(` Progress: ${migrated}/${totalChunks}`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
console.log(` ✓ Migrated ${stats.chunks} chunks`);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Migrate embeddings from PGlite to LibSQL
|
|
227
|
+
*
|
|
228
|
+
* KEY CONVERSION:
|
|
229
|
+
* - PGlite: vector(1024) → returns as '[1.2,3.4,...]' text
|
|
230
|
+
* - LibSQL: F32_BLOB(1024) → requires JSON.stringify([1.2,3.4,...])
|
|
231
|
+
*/
|
|
232
|
+
async function migrateEmbeddings(
|
|
233
|
+
pgDb: PGlite,
|
|
234
|
+
libsqlClient: ReturnType<typeof createClient>,
|
|
235
|
+
stats: MigrationStats
|
|
236
|
+
) {
|
|
237
|
+
console.log("\nMigrating embeddings...");
|
|
238
|
+
|
|
239
|
+
try {
|
|
240
|
+
const countResult = await pgDb.query(
|
|
241
|
+
"SELECT COUNT(*) as c FROM embeddings"
|
|
242
|
+
);
|
|
243
|
+
const totalEmb = parseInt((countResult.rows[0] as any).c);
|
|
244
|
+
|
|
245
|
+
console.log(` Found ${totalEmb} embeddings`);
|
|
246
|
+
|
|
247
|
+
const batchSize = 50; // Smaller batches for large embedding data
|
|
248
|
+
let migrated = 0;
|
|
249
|
+
|
|
250
|
+
for (let offset = 0; offset < totalEmb; offset += batchSize) {
|
|
251
|
+
const batch = await pgDb.query(
|
|
252
|
+
`SELECT chunk_id, embedding::text as embedding
|
|
253
|
+
FROM embeddings
|
|
254
|
+
ORDER BY chunk_id
|
|
255
|
+
LIMIT ${batchSize} OFFSET ${offset}`
|
|
256
|
+
);
|
|
257
|
+
|
|
258
|
+
const statements: Array<{ sql: string; args: [string, string] }> = [];
|
|
259
|
+
|
|
260
|
+
for (const row of batch.rows) {
|
|
261
|
+
try {
|
|
262
|
+
const embRow = row as { chunk_id: string; embedding: string };
|
|
263
|
+
|
|
264
|
+
// Parse pgvector text format → number array
|
|
265
|
+
const vectorArray = parseVector(embRow.embedding);
|
|
266
|
+
|
|
267
|
+
// Validate dimension
|
|
268
|
+
if (vectorArray.length !== EMBEDDING_DIM) {
|
|
269
|
+
stats.skippedEmbeddings++;
|
|
270
|
+
stats.errors.push(
|
|
271
|
+
`Embedding ${embRow.chunk_id}: wrong dimension ${vectorArray.length}, expected ${EMBEDDING_DIM}`
|
|
272
|
+
);
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Convert to LibSQL format: JSON.stringify for F32_BLOB
|
|
277
|
+
statements.push({
|
|
278
|
+
sql: "INSERT INTO embeddings (chunk_id, embedding) VALUES (?, vector(?))",
|
|
279
|
+
args: [embRow.chunk_id, JSON.stringify(vectorArray)],
|
|
280
|
+
});
|
|
281
|
+
} catch (e) {
|
|
282
|
+
const embRow = row as { chunk_id: string };
|
|
283
|
+
stats.skippedEmbeddings++;
|
|
284
|
+
stats.errors.push(`Embedding ${embRow.chunk_id}: ${e}`);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
if (statements.length > 0) {
|
|
289
|
+
try {
|
|
290
|
+
await libsqlClient.batch(statements, "write");
|
|
291
|
+
migrated += statements.length;
|
|
292
|
+
stats.embeddings += statements.length;
|
|
293
|
+
} catch (e) {
|
|
294
|
+
const error = `Embedding batch at offset ${offset}: ${e}`;
|
|
295
|
+
stats.errors.push(error);
|
|
296
|
+
console.error(` ✗ ${error}`);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (
|
|
301
|
+
migrated % 500 === 0 ||
|
|
302
|
+
migrated + stats.skippedEmbeddings >= totalEmb
|
|
303
|
+
) {
|
|
304
|
+
console.log(
|
|
305
|
+
` Progress: ${migrated}/${totalEmb} (${stats.skippedEmbeddings} skipped)`
|
|
306
|
+
);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
console.log(` ✓ Migrated ${stats.embeddings} embeddings`);
|
|
311
|
+
if (stats.skippedEmbeddings > 0) {
|
|
312
|
+
console.log(
|
|
313
|
+
` ⚠ Skipped ${stats.skippedEmbeddings} embeddings (see errors)`
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
} catch (e) {
|
|
317
|
+
console.error(` ✗ Failed to migrate embeddings: ${e}`);
|
|
318
|
+
console.log(" (Embeddings can be regenerated after migration)");
|
|
319
|
+
stats.errors.push(`Embeddings migration failed: ${e}`);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Verify migration succeeded
|
|
325
|
+
*/
|
|
326
|
+
async function verifyMigration(
|
|
327
|
+
pgDb: PGlite,
|
|
328
|
+
libsqlClient: ReturnType<typeof createClient>,
|
|
329
|
+
stats: MigrationStats
|
|
330
|
+
): Promise<boolean> {
|
|
331
|
+
console.log("\nVerifying migration...");
|
|
332
|
+
|
|
333
|
+
try {
|
|
334
|
+
// Check counts
|
|
335
|
+
const pgDocs = await pgDb.query("SELECT COUNT(*) as c FROM documents");
|
|
336
|
+
const pgChunks = await pgDb.query("SELECT COUNT(*) as c FROM chunks");
|
|
337
|
+
const pgEmb = await pgDb.query("SELECT COUNT(*) as c FROM embeddings");
|
|
338
|
+
|
|
339
|
+
const libsqlDocs = await libsqlClient.execute(
|
|
340
|
+
"SELECT COUNT(*) as c FROM documents"
|
|
341
|
+
);
|
|
342
|
+
const libsqlChunks = await libsqlClient.execute(
|
|
343
|
+
"SELECT COUNT(*) as c FROM chunks"
|
|
344
|
+
);
|
|
345
|
+
const libsqlEmb = await libsqlClient.execute(
|
|
346
|
+
"SELECT COUNT(*) as c FROM embeddings"
|
|
347
|
+
);
|
|
348
|
+
|
|
349
|
+
const pgDocsCount = parseInt((pgDocs.rows[0] as { c: string }).c);
|
|
350
|
+
const pgChunksCount = parseInt((pgChunks.rows[0] as { c: string }).c);
|
|
351
|
+
const pgEmbCount = parseInt((pgEmb.rows[0] as { c: string }).c);
|
|
352
|
+
|
|
353
|
+
const libsqlDocsCount = Number(
|
|
354
|
+
(libsqlDocs.rows[0] as unknown as { c: number }).c
|
|
355
|
+
);
|
|
356
|
+
const libsqlChunksCount = Number(
|
|
357
|
+
(libsqlChunks.rows[0] as unknown as { c: number }).c
|
|
358
|
+
);
|
|
359
|
+
const libsqlEmbCount = Number(
|
|
360
|
+
(libsqlEmb.rows[0] as unknown as { c: number }).c
|
|
361
|
+
);
|
|
362
|
+
|
|
363
|
+
console.log("\nComparison:");
|
|
364
|
+
console.log(` Documents: ${pgDocsCount} → ${libsqlDocsCount}`);
|
|
365
|
+
console.log(` Chunks: ${pgChunksCount} → ${libsqlChunksCount}`);
|
|
366
|
+
console.log(
|
|
367
|
+
` Embeddings: ${pgEmbCount} → ${libsqlEmbCount} (${stats.skippedEmbeddings} skipped)`
|
|
368
|
+
);
|
|
369
|
+
|
|
370
|
+
const docsMatch = pgDocsCount === libsqlDocsCount;
|
|
371
|
+
const chunksMatch = pgChunksCount === libsqlChunksCount;
|
|
372
|
+
const embMatch = pgEmbCount === libsqlEmbCount + stats.skippedEmbeddings;
|
|
373
|
+
|
|
374
|
+
if (docsMatch && chunksMatch && embMatch) {
|
|
375
|
+
console.log("\n✓ Verification passed - all counts match!");
|
|
376
|
+
return true;
|
|
377
|
+
} else {
|
|
378
|
+
console.log("\n✗ Verification failed - count mismatch");
|
|
379
|
+
if (!docsMatch) console.log(" - Documents don't match");
|
|
380
|
+
if (!chunksMatch) console.log(" - Chunks don't match");
|
|
381
|
+
if (!embMatch) console.log(" - Embeddings don't match");
|
|
382
|
+
return false;
|
|
383
|
+
}
|
|
384
|
+
} catch (e) {
|
|
385
|
+
console.error(`\n✗ Verification error: ${e}`);
|
|
386
|
+
return false;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Main migration workflow
|
|
392
|
+
*/
|
|
393
|
+
async function main() {
|
|
394
|
+
console.log("=== PGlite to LibSQL Migration ===\n");
|
|
395
|
+
console.log(`PGlite DB: ${pglitePath}`);
|
|
396
|
+
console.log(`LibSQL DB: ${libsqlPath}`);
|
|
397
|
+
|
|
398
|
+
// Check PGlite database exists
|
|
399
|
+
if (!existsSync(pglitePath)) {
|
|
400
|
+
console.error(`\nError: PGlite database not found at ${pglitePath}`);
|
|
401
|
+
console.error("Run with correct path or use default location.");
|
|
402
|
+
process.exit(1);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
const stats: MigrationStats = {
|
|
406
|
+
documents: 0,
|
|
407
|
+
chunks: 0,
|
|
408
|
+
embeddings: 0,
|
|
409
|
+
skippedEmbeddings: 0,
|
|
410
|
+
errors: [],
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
// Ensure LibSQL directory exists
|
|
414
|
+
const libsqlDir = dirname(libsqlPath);
|
|
415
|
+
if (!existsSync(libsqlDir)) {
|
|
416
|
+
mkdirSync(libsqlDir, { recursive: true });
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
console.log("\nOpening PGlite database...");
|
|
420
|
+
const pgDb = new PGlite(pglitePath, { extensions: { vector } });
|
|
421
|
+
await pgDb.waitReady;
|
|
422
|
+
console.log("✓ PGlite ready");
|
|
423
|
+
|
|
424
|
+
console.log("\nOpening LibSQL database...");
|
|
425
|
+
const libsqlClient = createClient({
|
|
426
|
+
url: `file:${libsqlPath}`,
|
|
427
|
+
});
|
|
428
|
+
console.log("✓ LibSQL ready");
|
|
429
|
+
|
|
430
|
+
try {
|
|
431
|
+
// Initialize LibSQL schema
|
|
432
|
+
await initLibSQLSchema(libsqlClient);
|
|
433
|
+
|
|
434
|
+
// Migrate data
|
|
435
|
+
await migrateDocuments(pgDb, libsqlClient, stats);
|
|
436
|
+
await migrateChunks(pgDb, libsqlClient, stats);
|
|
437
|
+
await migrateEmbeddings(pgDb, libsqlClient, stats);
|
|
438
|
+
|
|
439
|
+
// Verify migration
|
|
440
|
+
const verified = await verifyMigration(pgDb, libsqlClient, stats);
|
|
441
|
+
|
|
442
|
+
// Summary
|
|
443
|
+
console.log("\n=== Migration Summary ===");
|
|
444
|
+
console.log(`Documents migrated: ${stats.documents}`);
|
|
445
|
+
console.log(`Chunks migrated: ${stats.chunks}`);
|
|
446
|
+
console.log(`Embeddings migrated: ${stats.embeddings}`);
|
|
447
|
+
if (stats.skippedEmbeddings > 0) {
|
|
448
|
+
console.log(`Embeddings skipped: ${stats.skippedEmbeddings}`);
|
|
449
|
+
}
|
|
450
|
+
console.log(`Errors: ${stats.errors.length}`);
|
|
451
|
+
|
|
452
|
+
if (stats.errors.length > 0) {
|
|
453
|
+
console.log("\nErrors:");
|
|
454
|
+
stats.errors.slice(0, 10).forEach((err) => console.log(` - ${err}`));
|
|
455
|
+
if (stats.errors.length > 10) {
|
|
456
|
+
console.log(` ... and ${stats.errors.length - 10} more`);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
if (verified) {
|
|
461
|
+
console.log("\n✓ Migration completed successfully!");
|
|
462
|
+
console.log(`\nLibSQL database created at: ${libsqlPath}`);
|
|
463
|
+
process.exit(0);
|
|
464
|
+
} else {
|
|
465
|
+
console.log("\n⚠ Migration completed with verification warnings");
|
|
466
|
+
process.exit(1);
|
|
467
|
+
}
|
|
468
|
+
} catch (e) {
|
|
469
|
+
console.error(`\nMigration failed: ${e}`);
|
|
470
|
+
process.exit(1);
|
|
471
|
+
} finally {
|
|
472
|
+
await pgDb.close();
|
|
473
|
+
libsqlClient.close();
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
main().catch((e) => {
|
|
478
|
+
console.error("Fatal error:", e);
|
|
479
|
+
process.exit(1);
|
|
480
|
+
});
|