rag-lite-ts 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/cjs/cli/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/cjs/cli.js +25 -6
- package/dist/cjs/core/binary-index-format.js +6 -3
- package/dist/cjs/core/db.d.ts +56 -0
- package/dist/cjs/core/db.js +105 -0
- package/dist/cjs/core/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/cjs/core/search-pipeline.js +1 -1
- package/dist/cjs/core/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +45 -10
- package/dist/cjs/core/vector-index.js +229 -218
- package/dist/cjs/factories/ingestion-factory.js +3 -7
- package/dist/cjs/factories/search-factory.js +11 -0
- package/dist/cjs/index-manager.d.ts +23 -3
- package/dist/cjs/index-manager.js +84 -15
- package/dist/cjs/index.d.ts +2 -1
- package/dist/cjs/index.js +3 -1
- package/dist/esm/cli/indexer.js +73 -15
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.js +25 -6
- package/dist/esm/core/binary-index-format.js +6 -3
- package/dist/esm/core/db.d.ts +56 -0
- package/dist/esm/core/db.js +105 -0
- package/dist/esm/core/ingestion.js +3 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/search-pipeline.js +1 -1
- package/dist/esm/core/search.js +1 -1
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +45 -10
- package/dist/esm/core/vector-index.js +229 -218
- package/dist/esm/factories/ingestion-factory.js +3 -7
- package/dist/esm/factories/search-factory.js +11 -0
- package/dist/esm/index-manager.d.ts +23 -3
- package/dist/esm/index-manager.js +84 -15
- package/dist/esm/index.d.ts +2 -1
- package/dist/esm/index.js +3 -1
- package/package.json +14 -7
|
@@ -64,7 +64,7 @@ export class IndexManager {
|
|
|
64
64
|
this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
|
|
65
65
|
}
|
|
66
66
|
this.isInitialized = true;
|
|
67
|
-
const vectorCount = this.vectorIndex.getCurrentCount();
|
|
67
|
+
const vectorCount = await this.vectorIndex.getCurrentCount();
|
|
68
68
|
console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
|
|
69
69
|
}
|
|
70
70
|
catch (error) {
|
|
@@ -145,17 +145,18 @@ export class IndexManager {
|
|
|
145
145
|
vector: embedding.vector
|
|
146
146
|
}));
|
|
147
147
|
// Check if we need to resize the index before adding
|
|
148
|
-
const
|
|
149
|
-
const newCount =
|
|
148
|
+
const initialCount = await this.vectorIndex.getCurrentCount();
|
|
149
|
+
const newCount = initialCount + vectors.length;
|
|
150
150
|
const currentCapacity = 100000; // This should match the initial capacity
|
|
151
151
|
if (newCount > currentCapacity * 0.9) {
|
|
152
152
|
const newCapacity = Math.ceil(newCount * 1.5);
|
|
153
153
|
console.log(`Resizing index from ${currentCapacity} to ${newCapacity} to accommodate new vectors`);
|
|
154
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
154
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
155
155
|
}
|
|
156
156
|
// Add vectors incrementally (this is the key requirement - no rebuild needed)
|
|
157
|
-
this.vectorIndex.addVectors(vectors);
|
|
158
|
-
|
|
157
|
+
await this.vectorIndex.addVectors(vectors);
|
|
158
|
+
const finalCount = await this.vectorIndex.getCurrentCount();
|
|
159
|
+
console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${finalCount})`);
|
|
159
160
|
// Save the updated index
|
|
160
161
|
await this.saveIndex();
|
|
161
162
|
}
|
|
@@ -223,7 +224,7 @@ export class IndexManager {
|
|
|
223
224
|
const currentCapacity = 100000; // Default capacity
|
|
224
225
|
if (chunkData.length > currentCapacity * 0.8) {
|
|
225
226
|
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
226
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
227
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
227
228
|
console.log(`Resized index capacity to ${newCapacity} for ${chunkData.length} chunks`);
|
|
228
229
|
}
|
|
229
230
|
// Update model version if provided
|
|
@@ -279,7 +280,7 @@ export class IndexManager {
|
|
|
279
280
|
const currentCapacity = 100000;
|
|
280
281
|
if (chunkData.length > currentCapacity * 0.8) {
|
|
281
282
|
const newCapacity = Math.ceil(chunkData.length * 1.5);
|
|
282
|
-
this.vectorIndex.resizeIndex(newCapacity);
|
|
283
|
+
await this.vectorIndex.resizeIndex(newCapacity);
|
|
283
284
|
console.log(`Resized index capacity to ${newCapacity}`);
|
|
284
285
|
}
|
|
285
286
|
// Re-generate embeddings for all chunks
|
|
@@ -294,7 +295,7 @@ export class IndexManager {
|
|
|
294
295
|
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
295
296
|
vector: embedding.vector
|
|
296
297
|
}));
|
|
297
|
-
this.vectorIndex.addVectors(vectors);
|
|
298
|
+
await this.vectorIndex.addVectors(vectors);
|
|
298
299
|
console.log(`Added ${vectors.length} vectors to rebuilt index`);
|
|
299
300
|
// Update model version
|
|
300
301
|
await this.updateModelVersion(embeddingEngine.getModelVersion());
|
|
@@ -414,12 +415,12 @@ export class IndexManager {
|
|
|
414
415
|
// Create text-only index
|
|
415
416
|
this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
|
|
416
417
|
await this.textIndex.initialize();
|
|
417
|
-
this.textIndex.addVectors(indexData.textVectors);
|
|
418
|
+
await this.textIndex.addVectors(indexData.textVectors);
|
|
418
419
|
console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
|
|
419
420
|
// Create image-only index
|
|
420
421
|
this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
|
|
421
422
|
await this.imageIndex.initialize();
|
|
422
|
-
this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
|
+
await this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
424
|
console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
|
|
424
425
|
console.log('✓ Specialized indexes ready for content type filtering');
|
|
425
426
|
}
|
|
@@ -475,8 +476,9 @@ export class IndexManager {
|
|
|
475
476
|
}
|
|
476
477
|
/**
|
|
477
478
|
* Search for similar vectors
|
|
479
|
+
* Now async due to worker-based VectorIndex implementation
|
|
478
480
|
*/
|
|
479
|
-
search(queryVector, k = 5, contentType) {
|
|
481
|
+
async search(queryVector, k = 5, contentType) {
|
|
480
482
|
if (!this.isInitialized) {
|
|
481
483
|
throw new Error('Index manager not initialized');
|
|
482
484
|
}
|
|
@@ -499,7 +501,7 @@ export class IndexManager {
|
|
|
499
501
|
// No specialized indexes (text-only mode) - ignore contentType and use combined index
|
|
500
502
|
targetIndex = this.vectorIndex;
|
|
501
503
|
}
|
|
502
|
-
const results = targetIndex.search(queryVector, k);
|
|
504
|
+
const results = await targetIndex.search(queryVector, k);
|
|
503
505
|
// Convert numeric IDs back to embedding IDs
|
|
504
506
|
const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
|
|
505
507
|
return {
|
|
@@ -514,7 +516,7 @@ export class IndexManager {
|
|
|
514
516
|
if (!this.db) {
|
|
515
517
|
throw new Error('Database not initialized');
|
|
516
518
|
}
|
|
517
|
-
const totalVectors = this.vectorIndex.getCurrentCount();
|
|
519
|
+
const totalVectors = await this.vectorIndex.getCurrentCount();
|
|
518
520
|
try {
|
|
519
521
|
const systemInfo = await getSystemInfo(this.db);
|
|
520
522
|
const modelVersion = systemInfo?.modelVersion || null;
|
|
@@ -586,13 +588,80 @@ export class IndexManager {
|
|
|
586
588
|
return embeddingId;
|
|
587
589
|
}
|
|
588
590
|
/**
|
|
589
|
-
* Close database connection
|
|
591
|
+
* Close database connection and cleanup vector index worker
|
|
590
592
|
*/
|
|
591
593
|
async close() {
|
|
592
594
|
if (this.db) {
|
|
593
595
|
await this.db.close();
|
|
594
596
|
this.db = null;
|
|
595
597
|
}
|
|
598
|
+
// Clean up vector index worker to free WebAssembly memory
|
|
599
|
+
if (this.vectorIndex && typeof this.vectorIndex.cleanup === 'function') {
|
|
600
|
+
await this.vectorIndex.cleanup();
|
|
601
|
+
}
|
|
602
|
+
// Also clean up specialized indexes
|
|
603
|
+
if (this.textIndex && typeof this.textIndex.cleanup === 'function') {
|
|
604
|
+
await this.textIndex.cleanup();
|
|
605
|
+
}
|
|
606
|
+
if (this.imageIndex && typeof this.imageIndex.cleanup === 'function') {
|
|
607
|
+
await this.imageIndex.cleanup();
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* Reset the vector index by clearing all vectors while keeping the index structure.
|
|
612
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
613
|
+
*
|
|
614
|
+
* The reset operation:
|
|
615
|
+
* 1. Clears in-memory HNSW index
|
|
616
|
+
* 2. Clears in-memory vector storage and ID mappings
|
|
617
|
+
* 3. Reinitializes an empty index with the same parameters
|
|
618
|
+
* 4. Saves the empty index to disk (overwrites existing file)
|
|
619
|
+
*
|
|
620
|
+
* @returns Promise that resolves when reset is complete
|
|
621
|
+
*/
|
|
622
|
+
async reset() {
|
|
623
|
+
console.log('🔄 Starting index reset...');
|
|
624
|
+
const startTime = Date.now();
|
|
625
|
+
try {
|
|
626
|
+
// Clear in-memory mappings
|
|
627
|
+
const previousVectorCount = await this.vectorIndex.getCurrentCount();
|
|
628
|
+
this.hashToEmbeddingId.clear();
|
|
629
|
+
this.embeddingIdToHash.clear();
|
|
630
|
+
// Clear grouped embeddings if any
|
|
631
|
+
this.groupedEmbeddings = undefined;
|
|
632
|
+
// Clear specialized indexes if they exist
|
|
633
|
+
if (this.textIndex) {
|
|
634
|
+
this.textIndex = undefined;
|
|
635
|
+
}
|
|
636
|
+
if (this.imageIndex) {
|
|
637
|
+
this.imageIndex = undefined;
|
|
638
|
+
}
|
|
639
|
+
// Reset the vector index (clears all vectors and reinitializes empty HNSW graph)
|
|
640
|
+
console.log(' Resetting HNSW index...');
|
|
641
|
+
await this.vectorIndex.reset();
|
|
642
|
+
// Save the empty index to disk (this overwrites the existing file)
|
|
643
|
+
console.log(' Saving empty index to disk...');
|
|
644
|
+
await this.vectorIndex.saveIndex();
|
|
645
|
+
const resetTimeMs = Date.now() - startTime;
|
|
646
|
+
const currentCount = await this.vectorIndex.getCurrentCount();
|
|
647
|
+
console.log(`✓ Index reset complete in ${resetTimeMs}ms`);
|
|
648
|
+
console.log(` Vectors cleared: ${previousVectorCount}`);
|
|
649
|
+
console.log(` Current vector count: ${currentCount}`);
|
|
650
|
+
}
|
|
651
|
+
catch (error) {
|
|
652
|
+
const resetTimeMs = Date.now() - startTime;
|
|
653
|
+
console.error(`❌ Index reset failed after ${resetTimeMs}ms:`, error);
|
|
654
|
+
throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Check if the index has any vectors
|
|
659
|
+
* @returns true if the index contains vectors, false if empty
|
|
660
|
+
* Now async due to worker-based VectorIndex implementation
|
|
661
|
+
*/
|
|
662
|
+
async hasVectors() {
|
|
663
|
+
const count = await this.vectorIndex.getCurrentCount();
|
|
664
|
+
return count > 0;
|
|
596
665
|
}
|
|
597
666
|
}
|
|
598
667
|
//# sourceMappingURL=index-manager.js.map
|
package/dist/cjs/index.d.ts
CHANGED
|
@@ -59,7 +59,8 @@ export { CrossEncoderReranker, createTextRerankFunction } from './text/reranker.
|
|
|
59
59
|
export { countTokens } from './text/tokenizer.js';
|
|
60
60
|
export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
|
|
61
61
|
export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
|
|
62
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, type DatabaseConnection } from './core/db.js';
|
|
62
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData, type DatabaseConnection, type DatabaseResetOptions, type DatabaseResetResult } from './core/db.js';
|
|
63
|
+
export { KnowledgeBaseManager, type KnowledgeBaseResetOptions, type KnowledgeBaseResetResult } from './core/knowledge-base-manager.js';
|
|
63
64
|
export { IndexManager } from './index-manager.js';
|
|
64
65
|
export { VectorIndex } from './core/vector-index.js';
|
|
65
66
|
export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
|
package/dist/cjs/index.js
CHANGED
|
@@ -83,7 +83,9 @@ export { validateRerankingStrategy, validateRerankingConfig, getDefaultReranking
|
|
|
83
83
|
// CORE INFRASTRUCTURE (FOR ADVANCED USERS)
|
|
84
84
|
// =============================================================================
|
|
85
85
|
// Database operations
|
|
86
|
-
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds } from './core/db.js';
|
|
86
|
+
export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData } from './core/db.js';
|
|
87
|
+
// Knowledge Base Manager (for reset operations)
|
|
88
|
+
export { KnowledgeBaseManager } from './core/knowledge-base-manager.js';
|
|
87
89
|
// Vector index management
|
|
88
90
|
export { IndexManager } from './index-manager.js';
|
|
89
91
|
export { VectorIndex } from './core/vector-index.js';
|
package/dist/esm/cli/indexer.js
CHANGED
|
@@ -80,7 +80,7 @@ async function validateModeConfiguration(options) {
|
|
|
80
80
|
*/
|
|
81
81
|
export async function runIngest(path, options = {}) {
|
|
82
82
|
try {
|
|
83
|
-
// Handle --rebuild
|
|
83
|
+
// Handle --force-rebuild flag immediately to prevent dimension mismatch errors
|
|
84
84
|
// Validate path exists
|
|
85
85
|
const resolvedPath = resolve(path);
|
|
86
86
|
if (!existsSync(resolvedPath)) {
|
|
@@ -159,26 +159,52 @@ export async function runIngest(path, options = {}) {
|
|
|
159
159
|
factoryOptions.mode = options.mode;
|
|
160
160
|
console.log(`Using processing mode: ${options.mode}`);
|
|
161
161
|
}
|
|
162
|
-
if (options.
|
|
162
|
+
if (options.forceRebuild) {
|
|
163
163
|
factoryOptions.forceRebuild = true;
|
|
164
|
-
console.log('Force rebuild enabled
|
|
165
|
-
// Delete old index file immediately to prevent dimension mismatch errors
|
|
166
|
-
const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
|
|
167
|
-
const { existsSync, unlinkSync } = await import('fs');
|
|
168
|
-
if (existsSync(indexPath)) {
|
|
169
|
-
try {
|
|
170
|
-
unlinkSync(indexPath);
|
|
171
|
-
console.log('🗑️ Removed old index file to prevent dimension mismatch');
|
|
172
|
-
}
|
|
173
|
-
catch (error) {
|
|
174
|
-
console.warn(`⚠️ Could not remove old index file: ${error}`);
|
|
175
|
-
}
|
|
176
|
-
}
|
|
164
|
+
console.log('Force rebuild enabled (--force-rebuild)');
|
|
177
165
|
}
|
|
178
166
|
// Validate mode-specific model and strategy combinations
|
|
179
167
|
await validateModeConfiguration(factoryOptions);
|
|
180
168
|
const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
|
|
181
169
|
const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
|
|
170
|
+
// --force-rebuild: Always delete DB (and sidecars) and index to guarantee a clean rebuild.
|
|
171
|
+
if (options.forceRebuild) {
|
|
172
|
+
try {
|
|
173
|
+
const { existsSync: fsExistsSync, unlinkSync } = await import('fs');
|
|
174
|
+
console.log('🗑️ Deleting existing database and index to perform a clean rebuild...');
|
|
175
|
+
// Remove WAL/SHM if present (common on SQLite with WAL journaling).
|
|
176
|
+
const sidecars = [`${dbPath}-wal`, `${dbPath}-shm`];
|
|
177
|
+
for (const p of sidecars) {
|
|
178
|
+
if (fsExistsSync(p)) {
|
|
179
|
+
try {
|
|
180
|
+
unlinkSync(p);
|
|
181
|
+
}
|
|
182
|
+
catch (e) {
|
|
183
|
+
console.warn(`⚠️ Could not remove SQLite sidecar file (${p}):`, e);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
if (fsExistsSync(dbPath)) {
|
|
188
|
+
try {
|
|
189
|
+
unlinkSync(dbPath);
|
|
190
|
+
}
|
|
191
|
+
catch (e) {
|
|
192
|
+
console.warn(`⚠️ Could not remove database file (${dbPath}):`, e);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
if (fsExistsSync(indexPath)) {
|
|
196
|
+
try {
|
|
197
|
+
unlinkSync(indexPath);
|
|
198
|
+
}
|
|
199
|
+
catch (e) {
|
|
200
|
+
console.warn(`⚠️ Could not remove index file (${indexPath}):`, e);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
console.warn('⚠️ Could not delete existing database/index for clean rebuild:', error instanceof Error ? error.message : String(error));
|
|
206
|
+
}
|
|
207
|
+
}
|
|
182
208
|
// Setup graceful cleanup
|
|
183
209
|
setupCLICleanup(dbPath);
|
|
184
210
|
// Check if database is busy before starting
|
|
@@ -218,6 +244,22 @@ export async function runIngest(path, options = {}) {
|
|
|
218
244
|
console.log(`Processing rate: ${chunksPerSecond} chunks/second`);
|
|
219
245
|
}
|
|
220
246
|
console.log('\nIngestion completed successfully!');
|
|
247
|
+
// Run VACUUM to compact the SQLite database after ingestion
|
|
248
|
+
try {
|
|
249
|
+
const { openDatabase } = await import('../core/db.js');
|
|
250
|
+
const vacuumDb = await openDatabase(dbPath);
|
|
251
|
+
try {
|
|
252
|
+
console.log('Running VACUUM to optimize database size...');
|
|
253
|
+
await vacuumDb.run('VACUUM');
|
|
254
|
+
console.log('VACUUM completed successfully.');
|
|
255
|
+
}
|
|
256
|
+
finally {
|
|
257
|
+
await vacuumDb.close();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
catch (vacuumError) {
|
|
261
|
+
console.warn('⚠️ VACUUM operation failed or was skipped:', vacuumError instanceof Error ? vacuumError.message : String(vacuumError));
|
|
262
|
+
}
|
|
221
263
|
// Display mode-specific information
|
|
222
264
|
const mode = options.mode || 'text';
|
|
223
265
|
if (mode === 'multimodal') {
|
|
@@ -397,6 +439,22 @@ export async function runRebuild() {
|
|
|
397
439
|
console.log('All embeddings have been regenerated with the current model.');
|
|
398
440
|
console.log('');
|
|
399
441
|
console.log('You can now search your documents using: raglite search "your query"');
|
|
442
|
+
// Run VACUUM to compact the SQLite database after rebuild
|
|
443
|
+
try {
|
|
444
|
+
const { openDatabase } = await import('../core/db.js');
|
|
445
|
+
const vacuumDb = await openDatabase(dbPath);
|
|
446
|
+
try {
|
|
447
|
+
console.log('Running VACUUM to optimize database size after rebuild...');
|
|
448
|
+
await vacuumDb.run('VACUUM');
|
|
449
|
+
console.log('VACUUM completed successfully.');
|
|
450
|
+
}
|
|
451
|
+
finally {
|
|
452
|
+
await vacuumDb.close();
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
catch (vacuumError) {
|
|
456
|
+
console.warn('⚠️ VACUUM operation failed or was skipped:', vacuumError instanceof Error ? vacuumError.message : String(vacuumError));
|
|
457
|
+
}
|
|
400
458
|
}
|
|
401
459
|
finally {
|
|
402
460
|
await db.close();
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { fileURLToPath } from 'url';
|
|
2
|
+
import { dirname, join } from 'path';
|
|
3
|
+
import { spawn } from 'child_process';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = dirname(__filename);
|
|
7
|
+
/**
|
|
8
|
+
* Get the project root directory
|
|
9
|
+
* When built, CLI is at dist/esm/cli/ui-server.js, so go up 3 levels
|
|
10
|
+
* When running from source, CLI is at src/cli/ui-server.ts, so go up 2 levels
|
|
11
|
+
*/
|
|
12
|
+
function getProjectRoot() {
|
|
13
|
+
// Try going up 3 levels first (for built version)
|
|
14
|
+
const builtPath = join(__dirname, '../../..');
|
|
15
|
+
if (fs.existsSync(join(builtPath, 'package.json'))) {
|
|
16
|
+
return builtPath;
|
|
17
|
+
}
|
|
18
|
+
// Fallback: go up 2 levels (for source execution)
|
|
19
|
+
return join(__dirname, '../..');
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Launch the UI server
|
|
23
|
+
*/
|
|
24
|
+
export async function runUI(options = {}) {
|
|
25
|
+
const port = options.port || 3000;
|
|
26
|
+
const backendPort = options.backendPort || 3001;
|
|
27
|
+
console.log('🚀 Launching RAG-lite TS UI...');
|
|
28
|
+
// Resolve UI paths from project root
|
|
29
|
+
const projectRoot = getProjectRoot();
|
|
30
|
+
const backendBuiltPath = join(projectRoot, 'ui', 'backend', 'dist', 'index.js');
|
|
31
|
+
const backendSourcePath = join(projectRoot, 'ui', 'backend', 'src', 'index.ts');
|
|
32
|
+
const frontendBuiltPath = join(projectRoot, 'ui', 'frontend', 'dist');
|
|
33
|
+
const frontendSourcePath = join(projectRoot, 'ui', 'frontend');
|
|
34
|
+
// Check if built files exist
|
|
35
|
+
const useBuiltBackend = fs.existsSync(backendBuiltPath);
|
|
36
|
+
const useBuiltFrontend = fs.existsSync(frontendBuiltPath);
|
|
37
|
+
if (!useBuiltBackend && !fs.existsSync(backendSourcePath)) {
|
|
38
|
+
console.error(`❌ UI backend not found at: ${backendSourcePath}`);
|
|
39
|
+
console.error(' Make sure the UI is set up in the ui/ directory.');
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
if (!useBuiltFrontend && !fs.existsSync(frontendSourcePath)) {
|
|
43
|
+
console.error(`❌ UI frontend not found at: ${frontendSourcePath}`);
|
|
44
|
+
console.error(' Make sure the UI is set up in the ui/ directory.');
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
// Pass the working directory where 'raglite ui' was called to the backend
|
|
48
|
+
const workingDir = process.cwd();
|
|
49
|
+
// Built mode: single server on port (UI + API). Dev mode: backend on backendPort, frontend on port.
|
|
50
|
+
const effectiveBackendPort = useBuiltFrontend ? port : backendPort;
|
|
51
|
+
console.log(`📡 Starting backend on port ${effectiveBackendPort}...`);
|
|
52
|
+
// Start backend server - use built version if available
|
|
53
|
+
const backendCommand = useBuiltBackend ? 'node' : 'npx';
|
|
54
|
+
const backendArgs = useBuiltBackend
|
|
55
|
+
? [backendBuiltPath]
|
|
56
|
+
: ['tsx', backendSourcePath];
|
|
57
|
+
const backendProcess = spawn(backendCommand, backendArgs, {
|
|
58
|
+
stdio: 'pipe',
|
|
59
|
+
env: {
|
|
60
|
+
...process.env,
|
|
61
|
+
PORT: effectiveBackendPort.toString(),
|
|
62
|
+
RAG_WORKING_DIR: workingDir,
|
|
63
|
+
UI_FRONTEND_DIST: useBuiltFrontend ? frontendBuiltPath : undefined
|
|
64
|
+
},
|
|
65
|
+
shell: true
|
|
66
|
+
});
|
|
67
|
+
backendProcess.on('error', (err) => {
|
|
68
|
+
console.error('❌ Failed to start backend process:', err);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
});
|
|
71
|
+
// Forward backend output with prefix
|
|
72
|
+
backendProcess.stdout?.on('data', (data) => {
|
|
73
|
+
process.stdout.write(`[Backend] ${data}`);
|
|
74
|
+
});
|
|
75
|
+
backendProcess.stderr?.on('data', (data) => {
|
|
76
|
+
process.stderr.write(`[Backend] ${data}`);
|
|
77
|
+
});
|
|
78
|
+
// Only start frontend dev server if built version doesn't exist
|
|
79
|
+
let frontendProcess = null;
|
|
80
|
+
if (!useBuiltFrontend) {
|
|
81
|
+
console.log(`🎨 Starting frontend dev server on port ${port}...`);
|
|
82
|
+
frontendProcess = spawn('npm', ['run', 'dev'], {
|
|
83
|
+
cwd: frontendSourcePath,
|
|
84
|
+
stdio: 'pipe',
|
|
85
|
+
env: {
|
|
86
|
+
...process.env,
|
|
87
|
+
VITE_API_URL: `http://localhost:${effectiveBackendPort}`
|
|
88
|
+
},
|
|
89
|
+
shell: true
|
|
90
|
+
});
|
|
91
|
+
frontendProcess.on('error', (err) => {
|
|
92
|
+
console.error('❌ Failed to start frontend process:', err);
|
|
93
|
+
backendProcess.kill();
|
|
94
|
+
process.exit(1);
|
|
95
|
+
});
|
|
96
|
+
// Forward frontend output with prefix
|
|
97
|
+
frontendProcess.stdout?.on('data', (data) => {
|
|
98
|
+
process.stdout.write(`[Frontend] ${data}`);
|
|
99
|
+
});
|
|
100
|
+
frontendProcess.stderr?.on('data', (data) => {
|
|
101
|
+
process.stderr.write(`[Frontend] ${data}`);
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
console.log(`🎨 Using built frontend from ${frontendBuiltPath}`);
|
|
106
|
+
console.log(` Frontend will be served by backend on port ${effectiveBackendPort}`);
|
|
107
|
+
}
|
|
108
|
+
// Wait a bit for servers to start
|
|
109
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
110
|
+
console.log(`\n✨ UI Access:`);
|
|
111
|
+
if (useBuiltFrontend) {
|
|
112
|
+
console.log(` Frontend & Backend: http://localhost:${port}`);
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
console.log(` Frontend: http://localhost:${port}`);
|
|
116
|
+
console.log(` Backend: http://localhost:${effectiveBackendPort}`);
|
|
117
|
+
}
|
|
118
|
+
console.log(`\n💡 Press Ctrl+C to stop both servers\n`);
|
|
119
|
+
// Keep the process alive and handle cleanup
|
|
120
|
+
return new Promise((resolve) => {
|
|
121
|
+
const cleanup = () => {
|
|
122
|
+
console.log('\n🛑 Shutting down servers...');
|
|
123
|
+
backendProcess.kill();
|
|
124
|
+
if (frontendProcess) {
|
|
125
|
+
frontendProcess.kill();
|
|
126
|
+
}
|
|
127
|
+
resolve();
|
|
128
|
+
};
|
|
129
|
+
process.on('SIGINT', cleanup);
|
|
130
|
+
process.on('SIGTERM', cleanup);
|
|
131
|
+
// Handle process exits
|
|
132
|
+
backendProcess.on('exit', (code) => {
|
|
133
|
+
if (code !== 0 && code !== null) {
|
|
134
|
+
console.error(`\n❌ Backend process exited with code ${code}`);
|
|
135
|
+
if (frontendProcess) {
|
|
136
|
+
frontendProcess.kill();
|
|
137
|
+
}
|
|
138
|
+
resolve();
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
if (frontendProcess) {
|
|
142
|
+
frontendProcess.on('exit', (code) => {
|
|
143
|
+
if (code !== 0 && code !== null) {
|
|
144
|
+
console.error(`\n❌ Frontend process exited with code ${code}`);
|
|
145
|
+
backendProcess.kill();
|
|
146
|
+
resolve();
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
//# sourceMappingURL=ui-server.js.map
|
package/dist/esm/cli.js
CHANGED
|
@@ -6,8 +6,18 @@ import { EXIT_CODES, ConfigurationError } from './core/config.js';
|
|
|
6
6
|
// Get package.json for version info
|
|
7
7
|
const __filename = fileURLToPath(import.meta.url);
|
|
8
8
|
const __dirname = dirname(__filename);
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
// When built, CLI is at dist/esm/cli.js, so go up two levels to root
|
|
10
|
+
// When running from source, CLI is at src/cli.ts, so go up one level to root
|
|
11
|
+
const packageJsonPath = join(__dirname, '..', '..', 'package.json');
|
|
12
|
+
let packageJson;
|
|
13
|
+
try {
|
|
14
|
+
packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
// Fallback: try one level up (for source execution)
|
|
18
|
+
const fallbackPath = join(__dirname, '..', 'package.json');
|
|
19
|
+
packageJson = JSON.parse(readFileSync(fallbackPath, 'utf-8'));
|
|
20
|
+
}
|
|
11
21
|
/**
|
|
12
22
|
* Display version information
|
|
13
23
|
*/
|
|
@@ -28,6 +38,7 @@ Usage:
|
|
|
28
38
|
Commands:
|
|
29
39
|
ingest <path> Ingest documents from file or directory
|
|
30
40
|
search <query> Search indexed documents (text or image)
|
|
41
|
+
ui Launch the web interface
|
|
31
42
|
rebuild Rebuild the vector index
|
|
32
43
|
version Show version information
|
|
33
44
|
help Show this help message
|
|
@@ -43,6 +54,7 @@ Examples:
|
|
|
43
54
|
raglite search "red car" --content-type image # Search only image results
|
|
44
55
|
raglite search ./photo.jpg # Search with image (multimodal mode only)
|
|
45
56
|
raglite search ./image.png --top-k 5 # Find similar images
|
|
57
|
+
raglite ui # Launch web interface
|
|
46
58
|
|
|
47
59
|
raglite rebuild # Rebuild the entire index
|
|
48
60
|
|
|
@@ -55,7 +67,7 @@ Options for search:
|
|
|
55
67
|
Options for ingest:
|
|
56
68
|
--model <name> Use specific embedding model
|
|
57
69
|
--mode <mode> Processing mode: 'text' (default) or 'multimodal'
|
|
58
|
-
--
|
|
70
|
+
--force-rebuild Wipe DB+index and rebuild from scratch (DESTRUCTIVE)
|
|
59
71
|
--path-strategy <strategy> Path storage strategy: 'relative' (default) or 'absolute'
|
|
60
72
|
--path-base <path> Base directory for relative paths (defaults to current directory)
|
|
61
73
|
|
|
@@ -111,8 +123,8 @@ function parseArgs() {
|
|
|
111
123
|
else if (optionName === 'no-rerank') {
|
|
112
124
|
options.rerank = false;
|
|
113
125
|
}
|
|
114
|
-
else if (optionName === 'rebuild
|
|
115
|
-
options.
|
|
126
|
+
else if (optionName === 'force-rebuild') {
|
|
127
|
+
options.forceRebuild = true;
|
|
116
128
|
}
|
|
117
129
|
else if (optionName === 'help') {
|
|
118
130
|
return { command: 'help', args: [], options: {} };
|
|
@@ -169,7 +181,7 @@ function validateArgs(command, args, options) {
|
|
|
169
181
|
console.error('Options:');
|
|
170
182
|
console.error(' --model <name> Use specific embedding model');
|
|
171
183
|
console.error(' --mode <mode> Processing mode: text (default) or multimodal');
|
|
172
|
-
console.error(' --rebuild
|
|
184
|
+
console.error(' --force-rebuild Wipe DB+index and rebuild from scratch (DESTRUCTIVE)');
|
|
173
185
|
console.error('');
|
|
174
186
|
console.error('The path can be either a file (.md or .txt) or a directory.');
|
|
175
187
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
|
@@ -201,6 +213,9 @@ function validateArgs(command, args, options) {
|
|
|
201
213
|
case 'rebuild':
|
|
202
214
|
// No arguments required
|
|
203
215
|
break;
|
|
216
|
+
case 'ui':
|
|
217
|
+
// No arguments required
|
|
218
|
+
break;
|
|
204
219
|
case 'version':
|
|
205
220
|
// No validation needed
|
|
206
221
|
break;
|
|
@@ -412,6 +427,10 @@ async function main() {
|
|
|
412
427
|
const { runRebuild } = await import('./cli/indexer.js');
|
|
413
428
|
await runRebuild();
|
|
414
429
|
break;
|
|
430
|
+
case 'ui':
|
|
431
|
+
const { runUI } = await import('./cli/ui-server.js');
|
|
432
|
+
await runUI(options);
|
|
433
|
+
break;
|
|
415
434
|
default:
|
|
416
435
|
console.error(`Error: Unknown command '${command}'`);
|
|
417
436
|
process.exit(1);
|
|
@@ -27,10 +27,12 @@ export class BinaryIndexFormat {
|
|
|
27
27
|
* @param data Index data to serialize
|
|
28
28
|
*/
|
|
29
29
|
static async save(indexPath, data) {
|
|
30
|
-
//
|
|
30
|
+
// Use actual vector count to ensure accurate file size
|
|
31
|
+
const actualVectorCount = data.vectors.length;
|
|
32
|
+
// Calculate total size based on actual vectors
|
|
31
33
|
const headerSize = 24; // 6 uint32 fields
|
|
32
34
|
const vectorSize = 4 + (data.dimensions * 4); // id + vector
|
|
33
|
-
const totalSize = headerSize + (
|
|
35
|
+
const totalSize = headerSize + (actualVectorCount * vectorSize);
|
|
34
36
|
const buffer = new ArrayBuffer(totalSize);
|
|
35
37
|
const view = new DataView(buffer);
|
|
36
38
|
let offset = 0;
|
|
@@ -45,7 +47,8 @@ export class BinaryIndexFormat {
|
|
|
45
47
|
offset += 4;
|
|
46
48
|
view.setUint32(offset, data.seed, true);
|
|
47
49
|
offset += 4;
|
|
48
|
-
|
|
50
|
+
// Write actual vector count in header
|
|
51
|
+
view.setUint32(offset, actualVectorCount, true);
|
|
49
52
|
offset += 4;
|
|
50
53
|
// Write vectors
|
|
51
54
|
for (const item of data.vectors) {
|
package/dist/esm/core/db.d.ts
CHANGED
|
@@ -210,4 +210,60 @@ export declare function updateStorageStats(connection: DatabaseConnection, stats
|
|
|
210
210
|
filesystemRefs?: number;
|
|
211
211
|
lastCleanup?: Date;
|
|
212
212
|
}): Promise<void>;
|
|
213
|
+
/**
|
|
214
|
+
* Result of a database reset operation
|
|
215
|
+
*/
|
|
216
|
+
export interface DatabaseResetResult {
|
|
217
|
+
/** Whether the reset was successful */
|
|
218
|
+
success: boolean;
|
|
219
|
+
/** Number of documents deleted */
|
|
220
|
+
documentsDeleted: number;
|
|
221
|
+
/** Number of chunks deleted */
|
|
222
|
+
chunksDeleted: number;
|
|
223
|
+
/** Number of content metadata entries deleted */
|
|
224
|
+
contentMetadataDeleted: number;
|
|
225
|
+
/** Whether system_info was preserved or cleared */
|
|
226
|
+
systemInfoCleared: boolean;
|
|
227
|
+
/** Time taken for the reset operation in milliseconds */
|
|
228
|
+
resetTimeMs: number;
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Options for database reset operation
|
|
232
|
+
*/
|
|
233
|
+
export interface DatabaseResetOptions {
|
|
234
|
+
/** Whether to preserve system_info (mode, model configuration) - default: false */
|
|
235
|
+
preserveSystemInfo?: boolean;
|
|
236
|
+
/** Whether to run VACUUM after deletion to reclaim space - default: true */
|
|
237
|
+
runVacuum?: boolean;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Reset the database by deleting all data while keeping the schema intact.
|
|
241
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
242
|
+
*
|
|
243
|
+
* This function:
|
|
244
|
+
* 1. Deletes all rows from chunks, documents, content_metadata tables
|
|
245
|
+
* 2. Optionally clears system_info (mode/model configuration)
|
|
246
|
+
* 3. Resets storage_stats counters
|
|
247
|
+
* 4. Optionally runs VACUUM to reclaim disk space
|
|
248
|
+
*
|
|
249
|
+
* @param connection - Database connection object
|
|
250
|
+
* @param options - Reset options
|
|
251
|
+
* @returns Promise resolving to reset result statistics
|
|
252
|
+
*
|
|
253
|
+
* @example
|
|
254
|
+
* ```typescript
|
|
255
|
+
* const db = await openDatabase('./db.sqlite');
|
|
256
|
+
* const result = await resetDatabase(db, { preserveSystemInfo: false });
|
|
257
|
+
* console.log(`Deleted ${result.documentsDeleted} documents and ${result.chunksDeleted} chunks`);
|
|
258
|
+
* ```
|
|
259
|
+
*/
|
|
260
|
+
export declare function resetDatabase(connection: DatabaseConnection, options?: DatabaseResetOptions): Promise<DatabaseResetResult>;
|
|
261
|
+
/**
|
|
262
|
+
* Check if the database has any data (documents, chunks, or content)
|
|
263
|
+
* Useful for determining if a reset is needed
|
|
264
|
+
*
|
|
265
|
+
* @param connection - Database connection object
|
|
266
|
+
* @returns Promise resolving to true if database has data, false if empty
|
|
267
|
+
*/
|
|
268
|
+
export declare function hasDatabaseData(connection: DatabaseConnection): Promise<boolean>;
|
|
213
269
|
//# sourceMappingURL=db.d.ts.map
|