pdf-brain 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-brain",
3
- "version": "0.3.1",
3
+ "version": "0.4.0",
4
4
  "description": "Local PDF knowledge base with vector search",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
package/src/cli.ts CHANGED
@@ -85,6 +85,9 @@ Commands:
85
85
 
86
86
  check Check if Ollama is ready
87
87
 
88
+ repair Fix database integrity issues
89
+ Removes orphaned chunks/embeddings
90
+
88
91
  migrate Database migration utilities
89
92
  --check Check if migration is needed
90
93
  --import <file> Import from SQL dump file
@@ -322,6 +325,38 @@ const program = Effect.gen(function* () {
322
325
  break;
323
326
  }
324
327
 
328
+ case "repair": {
329
+ yield* Console.log("Checking database integrity...\n");
330
+ const result = yield* library.repair();
331
+
332
+ if (
333
+ result.orphanedChunks === 0 &&
334
+ result.orphanedEmbeddings === 0 &&
335
+ result.zeroVectorEmbeddings === 0
336
+ ) {
337
+ yield* Console.log("✓ Database is healthy - no repairs needed");
338
+ } else {
339
+ yield* Console.log("Repairs completed:");
340
+ if (result.orphanedChunks > 0) {
341
+ yield* Console.log(
342
+ ` • Removed ${result.orphanedChunks} orphaned chunks`,
343
+ );
344
+ }
345
+ if (result.orphanedEmbeddings > 0) {
346
+ yield* Console.log(
347
+ ` • Removed ${result.orphanedEmbeddings} orphaned embeddings`,
348
+ );
349
+ }
350
+ if (result.zeroVectorEmbeddings > 0) {
351
+ yield* Console.log(
352
+ ` • Removed ${result.zeroVectorEmbeddings} zero-dimension embeddings`,
353
+ );
354
+ }
355
+ yield* Console.log("\n✓ Database repaired");
356
+ }
357
+ break;
358
+ }
359
+
325
360
  default:
326
361
  yield* Console.error(`Unknown command: ${command}`);
327
362
  yield* Console.log(HELP);
package/src/index.ts CHANGED
@@ -289,6 +289,12 @@ export class PDFLibrary extends Effect.Service<PDFLibrary>()("PDFLibrary", {
289
289
  libraryPath: config.libraryPath,
290
290
  };
291
291
  }),
292
+
293
+ /**
294
+ * Repair database integrity issues
295
+ * Removes orphaned chunks and embeddings
296
+ */
297
+ repair: () => db.repair(),
292
298
  };
293
299
  }),
294
300
  dependencies: [OllamaLive, PDFExtractorLive, DatabaseLive],
@@ -76,6 +76,16 @@ export class Database extends Context.Tag("Database")<
76
76
  { documents: number; chunks: number; embeddings: number },
77
77
  DatabaseError
78
78
  >;
79
+
80
+ // Maintenance
81
+ readonly repair: () => Effect.Effect<
82
+ {
83
+ orphanedChunks: number;
84
+ orphanedEmbeddings: number;
85
+ zeroVectorEmbeddings: number;
86
+ },
87
+ DatabaseError
88
+ >;
79
89
  }
80
90
  >() {}
81
91
 
@@ -464,6 +474,70 @@ export const DatabaseLive = Layer.scoped(
464
474
  },
465
475
  catch: (e) => new DatabaseError({ reason: String(e) }),
466
476
  }),
477
+
478
+ repair: () =>
479
+ Effect.tryPromise({
480
+ try: async () => {
481
+ // Count orphaned chunks (doc_id not in documents)
482
+ const orphanedChunksResult = await db.query(`
483
+ SELECT COUNT(*) as count FROM chunks c
484
+ WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = c.doc_id)
485
+ `);
486
+ const orphanedChunks = Number(
487
+ (orphanedChunksResult.rows[0] as { count: number }).count,
488
+ );
489
+
490
+ // Count orphaned embeddings (chunk_id not in chunks)
491
+ const orphanedEmbeddingsResult = await db.query(`
492
+ SELECT COUNT(*) as count FROM embeddings e
493
+ WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = e.chunk_id)
494
+ `);
495
+ const orphanedEmbeddings = Number(
496
+ (orphanedEmbeddingsResult.rows[0] as { count: number }).count,
497
+ );
498
+
499
+ // Count zero-dimension embeddings (vector_dims returns 0 or null)
500
+ // Note: In pgvector, we check for malformed vectors
501
+ const zeroVectorResult = await db.query(`
502
+ SELECT COUNT(*) as count FROM embeddings
503
+ WHERE embedding IS NULL OR vector_dims(embedding) = 0
504
+ `);
505
+ const zeroVectorEmbeddings = Number(
506
+ (zeroVectorResult.rows[0] as { count: number }).count,
507
+ );
508
+
509
+ // Delete orphaned embeddings first (depends on chunks)
510
+ if (orphanedEmbeddings > 0) {
511
+ await db.query(`
512
+ DELETE FROM embeddings e
513
+ WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = e.chunk_id)
514
+ `);
515
+ }
516
+
517
+ // Delete orphaned chunks (depends on documents)
518
+ if (orphanedChunks > 0) {
519
+ await db.query(`
520
+ DELETE FROM chunks c
521
+ WHERE NOT EXISTS (SELECT 1 FROM documents d WHERE d.id = c.doc_id)
522
+ `);
523
+ }
524
+
525
+ // Delete zero-dimension embeddings
526
+ if (zeroVectorEmbeddings > 0) {
527
+ await db.query(`
528
+ DELETE FROM embeddings
529
+ WHERE embedding IS NULL OR vector_dims(embedding) = 0
530
+ `);
531
+ }
532
+
533
+ return {
534
+ orphanedChunks,
535
+ orphanedEmbeddings,
536
+ zeroVectorEmbeddings,
537
+ };
538
+ },
539
+ catch: (e) => new DatabaseError({ reason: String(e) }),
540
+ }),
467
541
  };
468
542
  }),
469
543
  );