rag-lite-ts 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/cjs/cli/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/cjs/cli.js +25 -6
- package/dist/cjs/core/binary-index-format.js +6 -3
- package/dist/cjs/core/db.d.ts +56 -0
- package/dist/cjs/core/db.js +105 -0
- package/dist/cjs/core/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/cjs/core/search-pipeline.js +1 -1
- package/dist/cjs/core/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +45 -10
- package/dist/cjs/core/vector-index.js +229 -218
- package/dist/cjs/factories/ingestion-factory.js +3 -7
- package/dist/cjs/factories/search-factory.js +11 -0
- package/dist/cjs/index-manager.d.ts +23 -3
- package/dist/cjs/index-manager.js +84 -15
- package/dist/cjs/index.d.ts +2 -1
- package/dist/cjs/index.js +3 -1
- package/dist/esm/cli/indexer.js +73 -15
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.js +25 -6
- package/dist/esm/core/binary-index-format.js +6 -3
- package/dist/esm/core/db.d.ts +56 -0
- package/dist/esm/core/db.js +105 -0
- package/dist/esm/core/ingestion.js +3 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/search-pipeline.js +1 -1
- package/dist/esm/core/search.js +1 -1
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +45 -10
- package/dist/esm/core/vector-index.js +229 -218
- package/dist/esm/factories/ingestion-factory.js +3 -7
- package/dist/esm/factories/search-factory.js +11 -0
- package/dist/esm/index-manager.d.ts +23 -3
- package/dist/esm/index-manager.js +84 -15
- package/dist/esm/index.d.ts +2 -1
- package/dist/esm/index.js +3 -1
- package/package.json +14 -7
|
@@ -64,7 +64,7 @@ export class IndexManager {
|
|
|
64
64
|
this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
|
|
65
65
|
}
|
|
66
66
|
this.isInitialized = true;
|
|
67
|
-
const vectorCount = this.vectorIndex.getCurrentCount();
|
|
67
|
+
const vectorCount = await this.vectorIndex.getCurrentCount();
|
|
68
68
|
console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
|
|
69
69
|
}
|
|
70
70
|
catch (error) {
|
|
@@ -145,17 +145,18 @@ export class IndexManager {
|
|
|
145
145
|
vector: embedding.vector
|
|
146
146
|
}));
|
|
147
147
|
// Check if we need to resize the index before adding
|
|
148
|
-
const
|
|
149
|
-
const newCount =
|
|
148
|
+
const initialCount = await this.vectorIndex.getCurrentCount();
|
|
149
|
+
const newCount = initialCount + vectors.length;
|
|
150
150
|
const currentCapacity = 100000; // This should match the initial capacity
|
|
151
151
|
if (newCount > currentCapacity * 0.9) {
|
|
152
152
|
const newCapacity = Math.ceil(newCount * 1.5);
|
|
153
153
|
console.log(`Resizing index from ${currentCapacity} to ${newCapacity} to accommodate new vectors`);
|
|
154
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
154
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
155
155
|
}
|
|
156
156
|
// Add vectors incrementally (this is the key requirement - no rebuild needed)
|
|
157
|
-
this.vectorIndex.addVectors(vectors);
|
|
158
|
-
|
|
157
|
+
await this.vectorIndex.addVectors(vectors);
|
|
158
|
+
const finalCount = await this.vectorIndex.getCurrentCount();
|
|
159
|
+
console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${finalCount})`);
|
|
159
160
|
// Save the updated index
|
|
160
161
|
await this.saveIndex();
|
|
161
162
|
}
|
|
@@ -223,7 +224,7 @@ export class IndexManager {
|
|
|
223
224
|
const currentCapacity = 100000; // Default capacity
|
|
224
225
|
if (chunkData.length > currentCapacity * 0.8) {
|
|
225
226
|
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
226
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
227
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
227
228
|
console.log(`Resized index capacity to ${newCapacity} for ${chunkData.length} chunks`);
|
|
228
229
|
}
|
|
229
230
|
// Update model version if provided
|
|
@@ -279,7 +280,7 @@ export class IndexManager {
|
|
|
279
280
|
const currentCapacity = 100000;
|
|
280
281
|
if (chunkData.length > currentCapacity * 0.8) {
|
|
281
282
|
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
282
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
283
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
283
284
|
console.log(`Resized index capacity to ${newCapacity}`);
|
|
284
285
|
}
|
|
285
286
|
// Re-generate embeddings for all chunks
|
|
@@ -294,7 +295,7 @@ export class IndexManager {
|
|
|
294
295
|
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
295
296
|
vector: embedding.vector
|
|
296
297
|
}));
|
|
297
|
-
this.vectorIndex.addVectors(vectors);
|
|
298
|
+
await this.vectorIndex.addVectors(vectors);
|
|
298
299
|
console.log(`Added ${vectors.length} vectors to rebuilt index`);
|
|
299
300
|
// Update model version
|
|
300
301
|
await this.updateModelVersion(embeddingEngine.getModelVersion());
|
|
@@ -414,12 +415,12 @@ export class IndexManager {
|
|
|
414
415
|
// Create text-only index
|
|
415
416
|
this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
|
|
416
417
|
await this.textIndex.initialize();
|
|
417
|
-
this.textIndex.addVectors(indexData.textVectors);
|
|
418
|
+
await this.textIndex.addVectors(indexData.textVectors);
|
|
418
419
|
console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
|
|
419
420
|
// Create image-only index
|
|
420
421
|
this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
|
|
421
422
|
await this.imageIndex.initialize();
|
|
422
|
-
this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
|
+
await this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
424
|
console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
|
|
424
425
|
console.log('✓ Specialized indexes ready for content type filtering');
|
|
425
426
|
}
|
|
@@ -475,8 +476,9 @@ export class IndexManager {
|
|
|
475
476
|
}
|
|
476
477
|
/**
|
|
477
478
|
* Search for similar vectors
|
|
479
|
+
* Now async due to worker-based VectorIndex implementation
|
|
478
480
|
*/
|
|
479
|
-
search(queryVector, k = 5, contentType) {
|
|
481
|
+
async search(queryVector, k = 5, contentType) {
|
|
480
482
|
if (!this.isInitialized) {
|
|
481
483
|
throw new Error('Index manager not initialized');
|
|
482
484
|
}
|
|
@@ -499,7 +501,7 @@ export class IndexManager {
|
|
|
499
501
|
// No specialized indexes (text-only mode) - ignore contentType and use combined index
|
|
500
502
|
targetIndex = this.vectorIndex;
|
|
501
503
|
}
|
|
502
|
-
const results = targetIndex.search(queryVector, k);
|
|
504
|
+
const results = await targetIndex.search(queryVector, k);
|
|
503
505
|
// Convert numeric IDs back to embedding IDs
|
|
504
506
|
const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
|
|
505
507
|
return {
|
|
@@ -514,7 +516,7 @@ export class IndexManager {
|
|
|
514
516
|
if (!this.db) {
|
|
515
517
|
throw new Error('Database not initialized');
|
|
516
518
|
}
|
|
517
|
-
const totalVectors = this.vectorIndex.getCurrentCount();
|
|
519
|
+
const totalVectors = await this.vectorIndex.getCurrentCount();
|
|
518
520
|
try {
|
|
519
521
|
const systemInfo = await getSystemInfo(this.db);
|
|
520
522
|
const modelVersion = systemInfo?.modelVersion || null;
|
|
@@ -586,13 +588,80 @@ export class IndexManager {
|
|
|
586
588
|
return embeddingId;
|
|
587
589
|
}
|
|
588
590
|
/**
|
|
589
|
-
* Close database connection
|
|
591
|
+
* Close database connection and cleanup vector index worker
|
|
590
592
|
*/
|
|
591
593
|
async close() {
|
|
592
594
|
if (this.db) {
|
|
593
595
|
await this.db.close();
|
|
594
596
|
this.db = null;
|
|
595
597
|
}
|
|
598
|
+
// Clean up vector index worker to free WebAssembly memory
|
|
599
|
+
if (this.vectorIndex && typeof this.vectorIndex.cleanup === 'function') {
|
|
600
|
+
await this.vectorIndex.cleanup();
|
|
601
|
+
}
|
|
602
|
+
// Also clean up specialized indexes
|
|
603
|
+
if (this.textIndex && typeof this.textIndex.cleanup === 'function') {
|
|
604
|
+
await this.textIndex.cleanup();
|
|
605
|
+
}
|
|
606
|
+
if (this.imageIndex && typeof this.imageIndex.cleanup === 'function') {
|
|
607
|
+
await this.imageIndex.cleanup();
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* Reset the vector index by clearing all vectors while keeping the index structure.
|
|
612
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
613
|
+
*
|
|
614
|
+
* The reset operation:
|
|
615
|
+
* 1. Clears in-memory HNSW index
|
|
616
|
+
* 2. Clears in-memory vector storage and ID mappings
|
|
617
|
+
* 3. Reinitializes an empty index with the same parameters
|
|
618
|
+
* 4. Saves the empty index to disk (overwrites existing file)
|
|
619
|
+
*
|
|
620
|
+
* @returns Promise that resolves when reset is complete
|
|
621
|
+
*/
|
|
622
|
+
async reset() {
|
|
623
|
+
console.log('🔄 Starting index reset...');
|
|
624
|
+
const startTime = Date.now();
|
|
625
|
+
try {
|
|
626
|
+
// Clear in-memory mappings
|
|
627
|
+
const previousVectorCount = await this.vectorIndex.getCurrentCount();
|
|
628
|
+
this.hashToEmbeddingId.clear();
|
|
629
|
+
this.embeddingIdToHash.clear();
|
|
630
|
+
// Clear grouped embeddings if any
|
|
631
|
+
this.groupedEmbeddings = undefined;
|
|
632
|
+
// Clear specialized indexes if they exist
|
|
633
|
+
if (this.textIndex) {
|
|
634
|
+
this.textIndex = undefined;
|
|
635
|
+
}
|
|
636
|
+
if (this.imageIndex) {
|
|
637
|
+
this.imageIndex = undefined;
|
|
638
|
+
}
|
|
639
|
+
// Reset the vector index (clears all vectors and reinitializes empty HNSW graph)
|
|
640
|
+
console.log(' Resetting HNSW index...');
|
|
641
|
+
await this.vectorIndex.reset();
|
|
642
|
+
// Save the empty index to disk (this overwrites the existing file)
|
|
643
|
+
console.log(' Saving empty index to disk...');
|
|
644
|
+
await this.vectorIndex.saveIndex();
|
|
645
|
+
const resetTimeMs = Date.now() - startTime;
|
|
646
|
+
const currentCount = await this.vectorIndex.getCurrentCount();
|
|
647
|
+
console.log(`✓ Index reset complete in ${resetTimeMs}ms`);
|
|
648
|
+
console.log(` Vectors cleared: ${previousVectorCount}`);
|
|
649
|
+
console.log(` Current vector count: ${currentCount}`);
|
|
650
|
+
}
|
|
651
|
+
catch (error) {
|
|
652
|
+
const resetTimeMs = Date.now() - startTime;
|
|
653
|
+
console.error(`❌ Index reset failed after ${resetTimeMs}ms:`, error);
|
|
654
|
+
throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Check if the index has any vectors
|
|
659
|
+
* @returns true if the index contains vectors, false if empty
|
|
660
|
+
* Now async due to worker-based VectorIndex implementation
|
|
661
|
+
*/
|
|
662
|
+
async hasVectors() {
|
|
663
|
+
const count = await this.vectorIndex.getCurrentCount();
|
|
664
|
+
return count > 0;
|
|
596
665
|
}
|
|
597
666
|
}
|
|
598
667
|
//# sourceMappingURL=index-manager.js.map
|
package/dist/esm/index.d.ts
CHANGED
|
@@ -59,7 +59,8 @@ export { CrossEncoderReranker, createTextRerankFunction } from './text/reranker.
|
|
|
59
59
|
export { countTokens } from './text/tokenizer.js';
|
|
60
60
|
export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
|
|
61
61
|
export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
|
|
62
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, type DatabaseConnection } from './core/db.js';
|
|
62
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData, type DatabaseConnection, type DatabaseResetOptions, type DatabaseResetResult } from './core/db.js';
|
|
63
|
+
export { KnowledgeBaseManager, type KnowledgeBaseResetOptions, type KnowledgeBaseResetResult } from './core/knowledge-base-manager.js';
|
|
63
64
|
export { IndexManager } from './index-manager.js';
|
|
64
65
|
export { VectorIndex } from './core/vector-index.js';
|
|
65
66
|
export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
|
package/dist/esm/index.js
CHANGED
|
@@ -83,7 +83,9 @@ export { validateRerankingStrategy, validateRerankingConfig, getDefaultReranking
|
|
|
83
83
|
// CORE INFRASTRUCTURE (FOR ADVANCED USERS)
|
|
84
84
|
// =============================================================================
|
|
85
85
|
// Database operations
|
|
86
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds } from './core/db.js';
|
|
86
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData } from './core/db.js';
|
|
87
|
+
// Knowledge Base Manager (for reset operations)
|
|
88
|
+
export { KnowledgeBaseManager } from './core/knowledge-base-manager.js';
|
|
87
89
|
// Vector index management
|
|
88
90
|
export { IndexManager } from './index-manager.js';
|
|
89
91
|
export { VectorIndex } from './core/vector-index.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rag-lite-ts",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"description": "Local-first TypeScript retrieval engine with Chameleon Multimodal Architecture for semantic search over text and image content",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/cjs/index.js",
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
},
|
|
12
12
|
"exports": {
|
|
13
13
|
".": {
|
|
14
|
+
"types": "./dist/esm/index.d.ts",
|
|
14
15
|
"import": {
|
|
15
16
|
"types": "./dist/esm/index.d.ts",
|
|
16
17
|
"default": "./dist/esm/index.js"
|
|
@@ -18,10 +19,10 @@
|
|
|
18
19
|
"require": {
|
|
19
20
|
"types": "./dist/cjs/index.d.ts",
|
|
20
21
|
"default": "./dist/cjs/index.js"
|
|
21
|
-
}
|
|
22
|
-
"types": "./dist/esm/index.d.ts"
|
|
22
|
+
}
|
|
23
23
|
},
|
|
24
24
|
"./mcp": {
|
|
25
|
+
"types": "./dist/esm/mcp-server.d.ts",
|
|
25
26
|
"import": {
|
|
26
27
|
"types": "./dist/esm/mcp-server.d.ts",
|
|
27
28
|
"default": "./dist/esm/mcp-server.js"
|
|
@@ -29,8 +30,7 @@
|
|
|
29
30
|
"require": {
|
|
30
31
|
"types": "./dist/cjs/mcp-server.d.ts",
|
|
31
32
|
"default": "./dist/cjs/mcp-server.js"
|
|
32
|
-
}
|
|
33
|
-
"types": "./dist/esm/mcp-server.d.ts"
|
|
33
|
+
}
|
|
34
34
|
}
|
|
35
35
|
},
|
|
36
36
|
"files": [
|
|
@@ -39,6 +39,10 @@
|
|
|
39
39
|
"dist/esm/**/*.d.ts",
|
|
40
40
|
"dist/cjs/**/*.d.ts",
|
|
41
41
|
"!dist/**/*.map",
|
|
42
|
+
"ui/backend/dist/**",
|
|
43
|
+
"ui/frontend/dist/**",
|
|
44
|
+
"!ui/**/src/**",
|
|
45
|
+
"!ui/**/node_modules/**",
|
|
42
46
|
"README.md",
|
|
43
47
|
"LICENSE"
|
|
44
48
|
],
|
|
@@ -47,7 +51,10 @@
|
|
|
47
51
|
"build:esm": "tsc -p tsconfig.esm.json",
|
|
48
52
|
"build:cjs": "tsc -p tsconfig.cjs.json",
|
|
49
53
|
"build:test": "tsc --project tsconfig.test.json",
|
|
50
|
-
"
|
|
54
|
+
"build:ui:backend": "cd ui/backend && npm run build",
|
|
55
|
+
"build:ui:frontend": "cd ui/frontend && npm run build",
|
|
56
|
+
"build:ui": "npm run build:ui:backend && npm run build:ui:frontend",
|
|
57
|
+
"clean": "rimraf dist ui/backend/dist ui/frontend/dist",
|
|
51
58
|
"dev": "tsc --watch",
|
|
52
59
|
"test": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__/core dist/__tests__/text dist/__tests__/preprocessors dist/__tests__/cli dist/__tests__/factories",
|
|
53
60
|
"test:verbose": "npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__/core dist/__tests__/text dist/__tests__/preprocessors dist/__tests__/cli dist/__tests__/factories",
|
|
@@ -63,7 +70,7 @@
|
|
|
63
70
|
"test:integration:verbose": "npm run build && npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__/integration",
|
|
64
71
|
"test:all": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__",
|
|
65
72
|
"test:all:verbose": "npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__",
|
|
66
|
-
"prepublishOnly": "npm run clean && npm run build"
|
|
73
|
+
"prepublishOnly": "npm run clean && npm run build && npm run build:ui"
|
|
67
74
|
},
|
|
68
75
|
"keywords": [
|
|
69
76
|
"rag",
|