rag-lite-ts 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +88 -5
  2. package/dist/cjs/cli/indexer.js +73 -15
  3. package/dist/cjs/cli/ui-server.d.ts +5 -0
  4. package/dist/cjs/cli/ui-server.js +152 -0
  5. package/dist/cjs/cli.js +25 -6
  6. package/dist/cjs/core/binary-index-format.js +6 -3
  7. package/dist/cjs/core/db.d.ts +56 -0
  8. package/dist/cjs/core/db.js +105 -0
  9. package/dist/cjs/core/ingestion.js +3 -0
  10. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  11. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  12. package/dist/cjs/core/search-pipeline.js +1 -1
  13. package/dist/cjs/core/search.js +1 -1
  14. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  15. package/dist/cjs/core/vector-index-messages.js +5 -0
  16. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  17. package/dist/cjs/core/vector-index-worker.js +304 -0
  18. package/dist/cjs/core/vector-index.d.ts +45 -10
  19. package/dist/cjs/core/vector-index.js +229 -218
  20. package/dist/cjs/factories/ingestion-factory.js +3 -7
  21. package/dist/cjs/factories/search-factory.js +11 -0
  22. package/dist/cjs/index-manager.d.ts +23 -3
  23. package/dist/cjs/index-manager.js +84 -15
  24. package/dist/cjs/index.d.ts +2 -1
  25. package/dist/cjs/index.js +3 -1
  26. package/dist/esm/cli/indexer.js +73 -15
  27. package/dist/esm/cli/ui-server.d.ts +5 -0
  28. package/dist/esm/cli/ui-server.js +152 -0
  29. package/dist/esm/cli.js +25 -6
  30. package/dist/esm/core/binary-index-format.js +6 -3
  31. package/dist/esm/core/db.d.ts +56 -0
  32. package/dist/esm/core/db.js +105 -0
  33. package/dist/esm/core/ingestion.js +3 -0
  34. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  35. package/dist/esm/core/knowledge-base-manager.js +256 -0
  36. package/dist/esm/core/search-pipeline.js +1 -1
  37. package/dist/esm/core/search.js +1 -1
  38. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  39. package/dist/esm/core/vector-index-messages.js +5 -0
  40. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  41. package/dist/esm/core/vector-index-worker.js +304 -0
  42. package/dist/esm/core/vector-index.d.ts +45 -10
  43. package/dist/esm/core/vector-index.js +229 -218
  44. package/dist/esm/factories/ingestion-factory.js +3 -7
  45. package/dist/esm/factories/search-factory.js +11 -0
  46. package/dist/esm/index-manager.d.ts +23 -3
  47. package/dist/esm/index-manager.js +84 -15
  48. package/dist/esm/index.d.ts +2 -1
  49. package/dist/esm/index.js +3 -1
  50. package/package.json +14 -7
@@ -64,7 +64,7 @@ export class IndexManager {
64
64
  this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
65
65
  }
66
66
  this.isInitialized = true;
67
- const vectorCount = this.vectorIndex.getCurrentCount();
67
+ const vectorCount = await this.vectorIndex.getCurrentCount();
68
68
  console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
69
69
  }
70
70
  catch (error) {
@@ -145,17 +145,18 @@ export class IndexManager {
145
145
  vector: embedding.vector
146
146
  }));
147
147
  // Check if we need to resize the index before adding
148
- const currentCount = this.vectorIndex.getCurrentCount();
149
- const newCount = currentCount + vectors.length;
148
+ const initialCount = await this.vectorIndex.getCurrentCount();
149
+ const newCount = initialCount + vectors.length;
150
150
  const currentCapacity = 100000; // This should match the initial capacity
151
151
  if (newCount > currentCapacity * 0.9) {
152
152
  const newCapacity = Math.ceil(newCount * 1.5);
153
153
  console.log(`Resizing index from ${currentCapacity} to ${newCapacity} to accommodate new vectors`);
154
- this.vectorIndex.resizeIndex(newCapacity);
154
+ await this.vectorIndex.resizeIndex(newCapacity);
155
155
  }
156
156
  // Add vectors incrementally (this is the key requirement - no rebuild needed)
157
- this.vectorIndex.addVectors(vectors);
158
- console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${this.vectorIndex.getCurrentCount()})`);
157
+ await this.vectorIndex.addVectors(vectors);
158
+ const finalCount = await this.vectorIndex.getCurrentCount();
159
+ console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${finalCount})`);
159
160
  // Save the updated index
160
161
  await this.saveIndex();
161
162
  }
@@ -223,7 +224,7 @@ export class IndexManager {
223
224
  const currentCapacity = 100000; // Default capacity
224
225
  if (chunkData.length > currentCapacity * 0.8) {
225
226
  const newCapacity = Math.ceil(chunkData.length * 1.5);
226
- this.vectorIndex.resizeIndex(newCapacity);
227
+ await this.vectorIndex.resizeIndex(newCapacity);
227
228
  console.log(`Resized index capacity to ${newCapacity} for ${chunkData.length} chunks`);
228
229
  }
229
230
  // Update model version if provided
@@ -279,7 +280,7 @@ export class IndexManager {
279
280
  const currentCapacity = 100000;
280
281
  if (chunkData.length > currentCapacity * 0.8) {
281
282
  const newCapacity = Math.ceil(chunkData.length * 1.5);
282
- this.vectorIndex.resizeIndex(newCapacity);
283
+ await this.vectorIndex.resizeIndex(newCapacity);
283
284
  console.log(`Resized index capacity to ${newCapacity}`);
284
285
  }
285
286
  // Re-generate embeddings for all chunks
@@ -294,7 +295,7 @@ export class IndexManager {
294
295
  id: this.hashEmbeddingId(embedding.embedding_id),
295
296
  vector: embedding.vector
296
297
  }));
297
- this.vectorIndex.addVectors(vectors);
298
+ await this.vectorIndex.addVectors(vectors);
298
299
  console.log(`Added ${vectors.length} vectors to rebuilt index`);
299
300
  // Update model version
300
301
  await this.updateModelVersion(embeddingEngine.getModelVersion());
@@ -414,12 +415,12 @@ export class IndexManager {
414
415
  // Create text-only index
415
416
  this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
416
417
  await this.textIndex.initialize();
417
- this.textIndex.addVectors(indexData.textVectors);
418
+ await this.textIndex.addVectors(indexData.textVectors);
418
419
  console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
419
420
  // Create image-only index
420
421
  this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
421
422
  await this.imageIndex.initialize();
422
- this.imageIndex.addVectors(indexData.imageVectors);
423
+ await this.imageIndex.addVectors(indexData.imageVectors);
423
424
  console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
424
425
  console.log('✓ Specialized indexes ready for content type filtering');
425
426
  }
@@ -475,8 +476,9 @@ export class IndexManager {
475
476
  }
476
477
  /**
477
478
  * Search for similar vectors
479
+ * Now async due to worker-based VectorIndex implementation
478
480
  */
479
- search(queryVector, k = 5, contentType) {
481
+ async search(queryVector, k = 5, contentType) {
480
482
  if (!this.isInitialized) {
481
483
  throw new Error('Index manager not initialized');
482
484
  }
@@ -499,7 +501,7 @@ export class IndexManager {
499
501
  // No specialized indexes (text-only mode) - ignore contentType and use combined index
500
502
  targetIndex = this.vectorIndex;
501
503
  }
502
- const results = targetIndex.search(queryVector, k);
504
+ const results = await targetIndex.search(queryVector, k);
503
505
  // Convert numeric IDs back to embedding IDs
504
506
  const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
505
507
  return {
@@ -514,7 +516,7 @@ export class IndexManager {
514
516
  if (!this.db) {
515
517
  throw new Error('Database not initialized');
516
518
  }
517
- const totalVectors = this.vectorIndex.getCurrentCount();
519
+ const totalVectors = await this.vectorIndex.getCurrentCount();
518
520
  try {
519
521
  const systemInfo = await getSystemInfo(this.db);
520
522
  const modelVersion = systemInfo?.modelVersion || null;
@@ -586,13 +588,80 @@ export class IndexManager {
586
588
  return embeddingId;
587
589
  }
588
590
  /**
589
- * Close database connection
591
+ * Close database connection and cleanup vector index worker
590
592
  */
591
593
  async close() {
592
594
  if (this.db) {
593
595
  await this.db.close();
594
596
  this.db = null;
595
597
  }
598
+ // Clean up vector index worker to free WebAssembly memory
599
+ if (this.vectorIndex && typeof this.vectorIndex.cleanup === 'function') {
600
+ await this.vectorIndex.cleanup();
601
+ }
602
+ // Also clean up specialized indexes
603
+ if (this.textIndex && typeof this.textIndex.cleanup === 'function') {
604
+ await this.textIndex.cleanup();
605
+ }
606
+ if (this.imageIndex && typeof this.imageIndex.cleanup === 'function') {
607
+ await this.imageIndex.cleanup();
608
+ }
609
+ }
610
+ /**
611
+ * Reset the vector index by clearing all vectors while keeping the index structure.
612
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
613
+ *
614
+ * The reset operation:
615
+ * 1. Clears in-memory HNSW index
616
+ * 2. Clears in-memory vector storage and ID mappings
617
+ * 3. Reinitializes an empty index with the same parameters
618
+ * 4. Saves the empty index to disk (overwrites existing file)
619
+ *
620
+ * @returns Promise that resolves when reset is complete
621
+ */
622
+ async reset() {
623
+ console.log('🔄 Starting index reset...');
624
+ const startTime = Date.now();
625
+ try {
626
+ // Clear in-memory mappings
627
+ const previousVectorCount = await this.vectorIndex.getCurrentCount();
628
+ this.hashToEmbeddingId.clear();
629
+ this.embeddingIdToHash.clear();
630
+ // Clear grouped embeddings if any
631
+ this.groupedEmbeddings = undefined;
632
+ // Clear specialized indexes if they exist
633
+ if (this.textIndex) {
634
+ this.textIndex = undefined;
635
+ }
636
+ if (this.imageIndex) {
637
+ this.imageIndex = undefined;
638
+ }
639
+ // Reset the vector index (clears all vectors and reinitializes empty HNSW graph)
640
+ console.log(' Resetting HNSW index...');
641
+ await this.vectorIndex.reset();
642
+ // Save the empty index to disk (this overwrites the existing file)
643
+ console.log(' Saving empty index to disk...');
644
+ await this.vectorIndex.saveIndex();
645
+ const resetTimeMs = Date.now() - startTime;
646
+ const currentCount = await this.vectorIndex.getCurrentCount();
647
+ console.log(`✓ Index reset complete in ${resetTimeMs}ms`);
648
+ console.log(` Vectors cleared: ${previousVectorCount}`);
649
+ console.log(` Current vector count: ${currentCount}`);
650
+ }
651
+ catch (error) {
652
+ const resetTimeMs = Date.now() - startTime;
653
+ console.error(`❌ Index reset failed after ${resetTimeMs}ms:`, error);
654
+ throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
655
+ }
656
+ }
657
+ /**
658
+ * Check if the index has any vectors
659
+ * @returns true if the index contains vectors, false if empty
660
+ * Now async due to worker-based VectorIndex implementation
661
+ */
662
+ async hasVectors() {
663
+ const count = await this.vectorIndex.getCurrentCount();
664
+ return count > 0;
596
665
  }
597
666
  }
598
667
  //# sourceMappingURL=index-manager.js.map
@@ -59,7 +59,8 @@ export { CrossEncoderReranker, createTextRerankFunction } from './text/reranker.
59
59
  export { countTokens } from './text/tokenizer.js';
60
60
  export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
61
61
  export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
62
- export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, type DatabaseConnection } from './core/db.js';
62
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData, type DatabaseConnection, type DatabaseResetOptions, type DatabaseResetResult } from './core/db.js';
63
+ export { KnowledgeBaseManager, type KnowledgeBaseResetOptions, type KnowledgeBaseResetResult } from './core/knowledge-base-manager.js';
63
64
  export { IndexManager } from './index-manager.js';
64
65
  export { VectorIndex } from './core/vector-index.js';
65
66
  export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
package/dist/esm/index.js CHANGED
@@ -83,7 +83,9 @@ export { validateRerankingStrategy, validateRerankingConfig, getDefaultReranking
83
83
  // CORE INFRASTRUCTURE (FOR ADVANCED USERS)
84
84
  // =============================================================================
85
85
  // Database operations
86
- export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds } from './core/db.js';
86
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData } from './core/db.js';
87
+ // Knowledge Base Manager (for reset operations)
88
+ export { KnowledgeBaseManager } from './core/knowledge-base-manager.js';
87
89
  // Vector index management
88
90
  export { IndexManager } from './index-manager.js';
89
91
  export { VectorIndex } from './core/vector-index.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rag-lite-ts",
3
- "version": "2.2.0",
3
+ "version": "2.3.0",
4
4
  "description": "Local-first TypeScript retrieval engine with Chameleon Multimodal Architecture for semantic search over text and image content",
5
5
  "type": "module",
6
6
  "main": "./dist/cjs/index.js",
@@ -11,6 +11,7 @@
11
11
  },
12
12
  "exports": {
13
13
  ".": {
14
+ "types": "./dist/esm/index.d.ts",
14
15
  "import": {
15
16
  "types": "./dist/esm/index.d.ts",
16
17
  "default": "./dist/esm/index.js"
@@ -18,10 +19,10 @@
18
19
  "require": {
19
20
  "types": "./dist/cjs/index.d.ts",
20
21
  "default": "./dist/cjs/index.js"
21
- },
22
- "types": "./dist/esm/index.d.ts"
22
+ }
23
23
  },
24
24
  "./mcp": {
25
+ "types": "./dist/esm/mcp-server.d.ts",
25
26
  "import": {
26
27
  "types": "./dist/esm/mcp-server.d.ts",
27
28
  "default": "./dist/esm/mcp-server.js"
@@ -29,8 +30,7 @@
29
30
  "require": {
30
31
  "types": "./dist/cjs/mcp-server.d.ts",
31
32
  "default": "./dist/cjs/mcp-server.js"
32
- },
33
- "types": "./dist/esm/mcp-server.d.ts"
33
+ }
34
34
  }
35
35
  },
36
36
  "files": [
@@ -39,6 +39,10 @@
39
39
  "dist/esm/**/*.d.ts",
40
40
  "dist/cjs/**/*.d.ts",
41
41
  "!dist/**/*.map",
42
+ "ui/backend/dist/**",
43
+ "ui/frontend/dist/**",
44
+ "!ui/**/src/**",
45
+ "!ui/**/node_modules/**",
42
46
  "README.md",
43
47
  "LICENSE"
44
48
  ],
@@ -47,7 +51,10 @@
47
51
  "build:esm": "tsc -p tsconfig.esm.json",
48
52
  "build:cjs": "tsc -p tsconfig.cjs.json",
49
53
  "build:test": "tsc --project tsconfig.test.json",
50
- "clean": "rimraf dist",
54
+ "build:ui:backend": "cd ui/backend && npm run build",
55
+ "build:ui:frontend": "cd ui/frontend && npm run build",
56
+ "build:ui": "npm run build:ui:backend && npm run build:ui:frontend",
57
+ "clean": "rimraf dist ui/backend/dist ui/frontend/dist",
51
58
  "dev": "tsc --watch",
52
59
  "test": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__/core dist/__tests__/text dist/__tests__/preprocessors dist/__tests__/cli dist/__tests__/factories",
53
60
  "test:verbose": "npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__/core dist/__tests__/text dist/__tests__/preprocessors dist/__tests__/cli dist/__tests__/factories",
@@ -63,7 +70,7 @@
63
70
  "test:integration:verbose": "npm run build && npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__/integration",
64
71
  "test:all": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__",
65
72
  "test:all:verbose": "npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__",
66
- "prepublishOnly": "npm run clean && npm run build"
73
+ "prepublishOnly": "npm run clean && npm run build && npm run build:ui"
67
74
  },
68
75
  "keywords": [
69
76
  "rag",