rag-lite-ts 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +88 -5
  2. package/dist/cjs/cli/indexer.js +73 -15
  3. package/dist/cjs/cli/ui-server.d.ts +5 -0
  4. package/dist/cjs/cli/ui-server.js +152 -0
  5. package/dist/cjs/cli.js +25 -6
  6. package/dist/cjs/core/binary-index-format.js +6 -3
  7. package/dist/cjs/core/db.d.ts +56 -0
  8. package/dist/cjs/core/db.js +105 -0
  9. package/dist/cjs/core/ingestion.js +3 -0
  10. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  11. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  12. package/dist/cjs/core/search-pipeline.js +1 -1
  13. package/dist/cjs/core/search.js +1 -1
  14. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  15. package/dist/cjs/core/vector-index-messages.js +5 -0
  16. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  17. package/dist/cjs/core/vector-index-worker.js +304 -0
  18. package/dist/cjs/core/vector-index.d.ts +45 -10
  19. package/dist/cjs/core/vector-index.js +229 -218
  20. package/dist/cjs/factories/ingestion-factory.js +3 -7
  21. package/dist/cjs/factories/search-factory.js +11 -0
  22. package/dist/cjs/index-manager.d.ts +23 -3
  23. package/dist/cjs/index-manager.js +84 -15
  24. package/dist/cjs/index.d.ts +2 -1
  25. package/dist/cjs/index.js +3 -1
  26. package/dist/esm/cli/indexer.js +73 -15
  27. package/dist/esm/cli/ui-server.d.ts +5 -0
  28. package/dist/esm/cli/ui-server.js +152 -0
  29. package/dist/esm/cli.js +25 -6
  30. package/dist/esm/core/binary-index-format.js +6 -3
  31. package/dist/esm/core/db.d.ts +56 -0
  32. package/dist/esm/core/db.js +105 -0
  33. package/dist/esm/core/ingestion.js +3 -0
  34. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  35. package/dist/esm/core/knowledge-base-manager.js +256 -0
  36. package/dist/esm/core/search-pipeline.js +1 -1
  37. package/dist/esm/core/search.js +1 -1
  38. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  39. package/dist/esm/core/vector-index-messages.js +5 -0
  40. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  41. package/dist/esm/core/vector-index-worker.js +304 -0
  42. package/dist/esm/core/vector-index.d.ts +45 -10
  43. package/dist/esm/core/vector-index.js +229 -218
  44. package/dist/esm/factories/ingestion-factory.js +3 -7
  45. package/dist/esm/factories/search-factory.js +11 -0
  46. package/dist/esm/index-manager.d.ts +23 -3
  47. package/dist/esm/index-manager.js +84 -15
  48. package/dist/esm/index.d.ts +2 -1
  49. package/dist/esm/index.js +3 -1
  50. package/package.json +14 -7
@@ -64,7 +64,7 @@ export class IndexManager {
64
64
  this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
65
65
  }
66
66
  this.isInitialized = true;
67
- const vectorCount = this.vectorIndex.getCurrentCount();
67
+ const vectorCount = await this.vectorIndex.getCurrentCount();
68
68
  console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
69
69
  }
70
70
  catch (error) {
@@ -145,17 +145,18 @@ export class IndexManager {
145
145
  vector: embedding.vector
146
146
  }));
147
147
  // Check if we need to resize the index before adding
148
- const currentCount = this.vectorIndex.getCurrentCount();
149
- const newCount = currentCount + vectors.length;
148
+ const initialCount = await this.vectorIndex.getCurrentCount();
149
+ const newCount = initialCount + vectors.length;
150
150
  const currentCapacity = 100000; // This should match the initial capacity
151
151
  if (newCount > currentCapacity * 0.9) {
152
152
  const newCapacity = Math.ceil(newCount * 1.5);
153
153
  console.log(`Resizing index from ${currentCapacity} to ${newCapacity} to accommodate new vectors`);
154
- this.vectorIndex.resizeIndex(newCapacity);
154
+ await this.vectorIndex.resizeIndex(newCapacity);
155
155
  }
156
156
  // Add vectors incrementally (this is the key requirement - no rebuild needed)
157
- this.vectorIndex.addVectors(vectors);
158
- console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${this.vectorIndex.getCurrentCount()})`);
157
+ await this.vectorIndex.addVectors(vectors);
158
+ const finalCount = await this.vectorIndex.getCurrentCount();
159
+ console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${finalCount})`);
159
160
  // Save the updated index
160
161
  await this.saveIndex();
161
162
  }
@@ -223,7 +224,7 @@ export class IndexManager {
223
224
  const currentCapacity = 100000; // Default capacity
224
225
  if (chunkData.length > currentCapacity * 0.8) {
225
226
  const newCapacity = Math.ceil(chunkData.length * 1.5);
226
- this.vectorIndex.resizeIndex(newCapacity);
227
+ await this.vectorIndex.resizeIndex(newCapacity);
227
228
  console.log(`Resized index capacity to ${newCapacity} for ${chunkData.length} chunks`);
228
229
  }
229
230
  // Update model version if provided
@@ -279,7 +280,7 @@ export class IndexManager {
279
280
  const currentCapacity = 100000;
280
281
  if (chunkData.length > currentCapacity * 0.8) {
281
282
  const newCapacity = Math.ceil(chunkData.length * 1.5);
282
- this.vectorIndex.resizeIndex(newCapacity);
283
+ await this.vectorIndex.resizeIndex(newCapacity);
283
284
  console.log(`Resized index capacity to ${newCapacity}`);
284
285
  }
285
286
  // Re-generate embeddings for all chunks
@@ -294,7 +295,7 @@ export class IndexManager {
294
295
  id: this.hashEmbeddingId(embedding.embedding_id),
295
296
  vector: embedding.vector
296
297
  }));
297
- this.vectorIndex.addVectors(vectors);
298
+ await this.vectorIndex.addVectors(vectors);
298
299
  console.log(`Added ${vectors.length} vectors to rebuilt index`);
299
300
  // Update model version
300
301
  await this.updateModelVersion(embeddingEngine.getModelVersion());
@@ -414,12 +415,12 @@ export class IndexManager {
414
415
  // Create text-only index
415
416
  this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
416
417
  await this.textIndex.initialize();
417
- this.textIndex.addVectors(indexData.textVectors);
418
+ await this.textIndex.addVectors(indexData.textVectors);
418
419
  console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
419
420
  // Create image-only index
420
421
  this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
421
422
  await this.imageIndex.initialize();
422
- this.imageIndex.addVectors(indexData.imageVectors);
423
+ await this.imageIndex.addVectors(indexData.imageVectors);
423
424
  console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
424
425
  console.log('✓ Specialized indexes ready for content type filtering');
425
426
  }
@@ -475,8 +476,9 @@ export class IndexManager {
475
476
  }
476
477
  /**
477
478
  * Search for similar vectors
479
+ * Now async due to worker-based VectorIndex implementation
478
480
  */
479
- search(queryVector, k = 5, contentType) {
481
+ async search(queryVector, k = 5, contentType) {
480
482
  if (!this.isInitialized) {
481
483
  throw new Error('Index manager not initialized');
482
484
  }
@@ -499,7 +501,7 @@ export class IndexManager {
499
501
  // No specialized indexes (text-only mode) - ignore contentType and use combined index
500
502
  targetIndex = this.vectorIndex;
501
503
  }
502
- const results = targetIndex.search(queryVector, k);
504
+ const results = await targetIndex.search(queryVector, k);
503
505
  // Convert numeric IDs back to embedding IDs
504
506
  const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
505
507
  return {
@@ -514,7 +516,7 @@ export class IndexManager {
514
516
  if (!this.db) {
515
517
  throw new Error('Database not initialized');
516
518
  }
517
- const totalVectors = this.vectorIndex.getCurrentCount();
519
+ const totalVectors = await this.vectorIndex.getCurrentCount();
518
520
  try {
519
521
  const systemInfo = await getSystemInfo(this.db);
520
522
  const modelVersion = systemInfo?.modelVersion || null;
@@ -586,13 +588,80 @@ export class IndexManager {
586
588
  return embeddingId;
587
589
  }
588
590
  /**
589
- * Close database connection
591
+ * Close database connection and cleanup vector index worker
590
592
  */
591
593
  async close() {
592
594
  if (this.db) {
593
595
  await this.db.close();
594
596
  this.db = null;
595
597
  }
598
+ // Clean up vector index worker to free WebAssembly memory
599
+ if (this.vectorIndex && typeof this.vectorIndex.cleanup === 'function') {
600
+ await this.vectorIndex.cleanup();
601
+ }
602
+ // Also clean up specialized indexes
603
+ if (this.textIndex && typeof this.textIndex.cleanup === 'function') {
604
+ await this.textIndex.cleanup();
605
+ }
606
+ if (this.imageIndex && typeof this.imageIndex.cleanup === 'function') {
607
+ await this.imageIndex.cleanup();
608
+ }
609
+ }
610
+ /**
611
+ * Reset the vector index by clearing all vectors while keeping the index structure.
612
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
613
+ *
614
+ * The reset operation:
615
+ * 1. Clears in-memory HNSW index
616
+ * 2. Clears in-memory vector storage and ID mappings
617
+ * 3. Reinitializes an empty index with the same parameters
618
+ * 4. Saves the empty index to disk (overwrites existing file)
619
+ *
620
+ * @returns Promise that resolves when reset is complete
621
+ */
622
+ async reset() {
623
+ console.log('🔄 Starting index reset...');
624
+ const startTime = Date.now();
625
+ try {
626
+ // Clear in-memory mappings
627
+ const previousVectorCount = await this.vectorIndex.getCurrentCount();
628
+ this.hashToEmbeddingId.clear();
629
+ this.embeddingIdToHash.clear();
630
+ // Clear grouped embeddings if any
631
+ this.groupedEmbeddings = undefined;
632
+ // Clear specialized indexes if they exist
633
+ if (this.textIndex) {
634
+ this.textIndex = undefined;
635
+ }
636
+ if (this.imageIndex) {
637
+ this.imageIndex = undefined;
638
+ }
639
+ // Reset the vector index (clears all vectors and reinitializes empty HNSW graph)
640
+ console.log(' Resetting HNSW index...');
641
+ await this.vectorIndex.reset();
642
+ // Save the empty index to disk (this overwrites the existing file)
643
+ console.log(' Saving empty index to disk...');
644
+ await this.vectorIndex.saveIndex();
645
+ const resetTimeMs = Date.now() - startTime;
646
+ const currentCount = await this.vectorIndex.getCurrentCount();
647
+ console.log(`✓ Index reset complete in ${resetTimeMs}ms`);
648
+ console.log(` Vectors cleared: ${previousVectorCount}`);
649
+ console.log(` Current vector count: ${currentCount}`);
650
+ }
651
+ catch (error) {
652
+ const resetTimeMs = Date.now() - startTime;
653
+ console.error(`❌ Index reset failed after ${resetTimeMs}ms:`, error);
654
+ throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
655
+ }
656
+ }
657
+ /**
658
+ * Check if the index has any vectors
659
+ * @returns true if the index contains vectors, false if empty
660
+ * Now async due to worker-based VectorIndex implementation
661
+ */
662
+ async hasVectors() {
663
+ const count = await this.vectorIndex.getCurrentCount();
664
+ return count > 0;
596
665
  }
597
666
  }
598
667
  //# sourceMappingURL=index-manager.js.map
@@ -59,7 +59,8 @@ export { CrossEncoderReranker, createTextRerankFunction } from './text/reranker.
59
59
  export { countTokens } from './text/tokenizer.js';
60
60
  export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
61
61
  export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
62
- export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, type DatabaseConnection } from './core/db.js';
62
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData, type DatabaseConnection, type DatabaseResetOptions, type DatabaseResetResult } from './core/db.js';
63
+ export { KnowledgeBaseManager, type KnowledgeBaseResetOptions, type KnowledgeBaseResetResult } from './core/knowledge-base-manager.js';
63
64
  export { IndexManager } from './index-manager.js';
64
65
  export { VectorIndex } from './core/vector-index.js';
65
66
  export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
package/dist/cjs/index.js CHANGED
@@ -83,7 +83,9 @@ export { validateRerankingStrategy, validateRerankingConfig, getDefaultReranking
83
83
  // CORE INFRASTRUCTURE (FOR ADVANCED USERS)
84
84
  // =============================================================================
85
85
  // Database operations
86
- export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds } from './core/db.js';
86
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, resetDatabase, hasDatabaseData } from './core/db.js';
87
+ // Knowledge Base Manager (for reset operations)
88
+ export { KnowledgeBaseManager } from './core/knowledge-base-manager.js';
87
89
  // Vector index management
88
90
  export { IndexManager } from './index-manager.js';
89
91
  export { VectorIndex } from './core/vector-index.js';
@@ -80,7 +80,7 @@ async function validateModeConfiguration(options) {
80
80
  */
81
81
  export async function runIngest(path, options = {}) {
82
82
  try {
83
- // Handle --rebuild-if-needed flag immediately to prevent dimension mismatch error
83
+ // Handle --force-rebuild flag immediately to prevent dimension mismatch errors
84
84
  // Validate path exists
85
85
  const resolvedPath = resolve(path);
86
86
  if (!existsSync(resolvedPath)) {
@@ -159,26 +159,52 @@ export async function runIngest(path, options = {}) {
159
159
  factoryOptions.mode = options.mode;
160
160
  console.log(`Using processing mode: ${options.mode}`);
161
161
  }
162
- if (options.rebuildIfNeeded) {
162
+ if (options.forceRebuild) {
163
163
  factoryOptions.forceRebuild = true;
164
- console.log('Force rebuild enabled due to rebuildIfNeeded option');
165
- // Delete old index file immediately to prevent dimension mismatch errors
166
- const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
167
- const { existsSync, unlinkSync } = await import('fs');
168
- if (existsSync(indexPath)) {
169
- try {
170
- unlinkSync(indexPath);
171
- console.log('🗑️ Removed old index file to prevent dimension mismatch');
172
- }
173
- catch (error) {
174
- console.warn(`⚠️ Could not remove old index file: ${error}`);
175
- }
176
- }
164
+ console.log('Force rebuild enabled (--force-rebuild)');
177
165
  }
178
166
  // Validate mode-specific model and strategy combinations
179
167
  await validateModeConfiguration(factoryOptions);
180
168
  const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
181
169
  const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
170
+ // --force-rebuild: Always delete DB (and sidecars) and index to guarantee a clean rebuild.
171
+ if (options.forceRebuild) {
172
+ try {
173
+ const { existsSync: fsExistsSync, unlinkSync } = await import('fs');
174
+ console.log('🗑️ Deleting existing database and index to perform a clean rebuild...');
175
+ // Remove WAL/SHM if present (common on SQLite with WAL journaling).
176
+ const sidecars = [`${dbPath}-wal`, `${dbPath}-shm`];
177
+ for (const p of sidecars) {
178
+ if (fsExistsSync(p)) {
179
+ try {
180
+ unlinkSync(p);
181
+ }
182
+ catch (e) {
183
+ console.warn(`⚠️ Could not remove SQLite sidecar file (${p}):`, e);
184
+ }
185
+ }
186
+ }
187
+ if (fsExistsSync(dbPath)) {
188
+ try {
189
+ unlinkSync(dbPath);
190
+ }
191
+ catch (e) {
192
+ console.warn(`⚠️ Could not remove database file (${dbPath}):`, e);
193
+ }
194
+ }
195
+ if (fsExistsSync(indexPath)) {
196
+ try {
197
+ unlinkSync(indexPath);
198
+ }
199
+ catch (e) {
200
+ console.warn(`⚠️ Could not remove index file (${indexPath}):`, e);
201
+ }
202
+ }
203
+ }
204
+ catch (error) {
205
+ console.warn('⚠️ Could not delete existing database/index for clean rebuild:', error instanceof Error ? error.message : String(error));
206
+ }
207
+ }
182
208
  // Setup graceful cleanup
183
209
  setupCLICleanup(dbPath);
184
210
  // Check if database is busy before starting
@@ -218,6 +244,22 @@ export async function runIngest(path, options = {}) {
218
244
  console.log(`Processing rate: ${chunksPerSecond} chunks/second`);
219
245
  }
220
246
  console.log('\nIngestion completed successfully!');
247
+ // Run VACUUM to compact the SQLite database after ingestion
248
+ try {
249
+ const { openDatabase } = await import('../core/db.js');
250
+ const vacuumDb = await openDatabase(dbPath);
251
+ try {
252
+ console.log('Running VACUUM to optimize database size...');
253
+ await vacuumDb.run('VACUUM');
254
+ console.log('VACUUM completed successfully.');
255
+ }
256
+ finally {
257
+ await vacuumDb.close();
258
+ }
259
+ }
260
+ catch (vacuumError) {
261
+ console.warn('⚠️ VACUUM operation failed or was skipped:', vacuumError instanceof Error ? vacuumError.message : String(vacuumError));
262
+ }
221
263
  // Display mode-specific information
222
264
  const mode = options.mode || 'text';
223
265
  if (mode === 'multimodal') {
@@ -397,6 +439,22 @@ export async function runRebuild() {
397
439
  console.log('All embeddings have been regenerated with the current model.');
398
440
  console.log('');
399
441
  console.log('You can now search your documents using: raglite search "your query"');
442
+ // Run VACUUM to compact the SQLite database after rebuild
443
+ try {
444
+ const { openDatabase } = await import('../core/db.js');
445
+ const vacuumDb = await openDatabase(dbPath);
446
+ try {
447
+ console.log('Running VACUUM to optimize database size after rebuild...');
448
+ await vacuumDb.run('VACUUM');
449
+ console.log('VACUUM completed successfully.');
450
+ }
451
+ finally {
452
+ await vacuumDb.close();
453
+ }
454
+ }
455
+ catch (vacuumError) {
456
+ console.warn('⚠️ VACUUM operation failed or was skipped:', vacuumError instanceof Error ? vacuumError.message : String(vacuumError));
457
+ }
400
458
  }
401
459
  finally {
402
460
  await db.close();
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Launch the UI server
3
+ */
4
+ export declare function runUI(options?: any): Promise<void>;
5
+ //# sourceMappingURL=ui-server.d.ts.map
@@ -0,0 +1,152 @@
1
+ import { fileURLToPath } from 'url';
2
+ import { dirname, join } from 'path';
3
+ import { spawn } from 'child_process';
4
+ import fs from 'fs';
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = dirname(__filename);
7
+ /**
8
+ * Get the project root directory
9
+ * When built, CLI is at dist/esm/cli/ui-server.js, so go up 3 levels
10
+ * When running from source, CLI is at src/cli/ui-server.ts, so go up 2 levels
11
+ */
12
+ function getProjectRoot() {
13
+ // Try going up 3 levels first (for built version)
14
+ const builtPath = join(__dirname, '../../..');
15
+ if (fs.existsSync(join(builtPath, 'package.json'))) {
16
+ return builtPath;
17
+ }
18
+ // Fallback: go up 2 levels (for source execution)
19
+ return join(__dirname, '../..');
20
+ }
21
+ /**
22
+ * Launch the UI server
23
+ */
24
+ export async function runUI(options = {}) {
25
+ const port = options.port || 3000;
26
+ const backendPort = options.backendPort || 3001;
27
+ console.log('🚀 Launching RAG-lite TS UI...');
28
+ // Resolve UI paths from project root
29
+ const projectRoot = getProjectRoot();
30
+ const backendBuiltPath = join(projectRoot, 'ui', 'backend', 'dist', 'index.js');
31
+ const backendSourcePath = join(projectRoot, 'ui', 'backend', 'src', 'index.ts');
32
+ const frontendBuiltPath = join(projectRoot, 'ui', 'frontend', 'dist');
33
+ const frontendSourcePath = join(projectRoot, 'ui', 'frontend');
34
+ // Check if built files exist
35
+ const useBuiltBackend = fs.existsSync(backendBuiltPath);
36
+ const useBuiltFrontend = fs.existsSync(frontendBuiltPath);
37
+ if (!useBuiltBackend && !fs.existsSync(backendSourcePath)) {
38
+ console.error(`❌ UI backend not found at: ${backendSourcePath}`);
39
+ console.error(' Make sure the UI is set up in the ui/ directory.');
40
+ process.exit(1);
41
+ }
42
+ if (!useBuiltFrontend && !fs.existsSync(frontendSourcePath)) {
43
+ console.error(`❌ UI frontend not found at: ${frontendSourcePath}`);
44
+ console.error(' Make sure the UI is set up in the ui/ directory.');
45
+ process.exit(1);
46
+ }
47
+ // Pass the working directory where 'raglite ui' was called to the backend
48
+ const workingDir = process.cwd();
49
+ // Built mode: single server on port (UI + API). Dev mode: backend on backendPort, frontend on port.
50
+ const effectiveBackendPort = useBuiltFrontend ? port : backendPort;
51
+ console.log(`📡 Starting backend on port ${effectiveBackendPort}...`);
52
+ // Start backend server - use built version if available
53
+ const backendCommand = useBuiltBackend ? 'node' : 'npx';
54
+ const backendArgs = useBuiltBackend
55
+ ? [backendBuiltPath]
56
+ : ['tsx', backendSourcePath];
57
+ const backendProcess = spawn(backendCommand, backendArgs, {
58
+ stdio: 'pipe',
59
+ env: {
60
+ ...process.env,
61
+ PORT: effectiveBackendPort.toString(),
62
+ RAG_WORKING_DIR: workingDir,
63
+ UI_FRONTEND_DIST: useBuiltFrontend ? frontendBuiltPath : undefined
64
+ },
65
+ shell: true
66
+ });
67
+ backendProcess.on('error', (err) => {
68
+ console.error('❌ Failed to start backend process:', err);
69
+ process.exit(1);
70
+ });
71
+ // Forward backend output with prefix
72
+ backendProcess.stdout?.on('data', (data) => {
73
+ process.stdout.write(`[Backend] ${data}`);
74
+ });
75
+ backendProcess.stderr?.on('data', (data) => {
76
+ process.stderr.write(`[Backend] ${data}`);
77
+ });
78
+ // Only start frontend dev server if built version doesn't exist
79
+ let frontendProcess = null;
80
+ if (!useBuiltFrontend) {
81
+ console.log(`🎨 Starting frontend dev server on port ${port}...`);
82
+ frontendProcess = spawn('npm', ['run', 'dev'], {
83
+ cwd: frontendSourcePath,
84
+ stdio: 'pipe',
85
+ env: {
86
+ ...process.env,
87
+ VITE_API_URL: `http://localhost:${effectiveBackendPort}`
88
+ },
89
+ shell: true
90
+ });
91
+ frontendProcess.on('error', (err) => {
92
+ console.error('❌ Failed to start frontend process:', err);
93
+ backendProcess.kill();
94
+ process.exit(1);
95
+ });
96
+ // Forward frontend output with prefix
97
+ frontendProcess.stdout?.on('data', (data) => {
98
+ process.stdout.write(`[Frontend] ${data}`);
99
+ });
100
+ frontendProcess.stderr?.on('data', (data) => {
101
+ process.stderr.write(`[Frontend] ${data}`);
102
+ });
103
+ }
104
+ else {
105
+ console.log(`🎨 Using built frontend from ${frontendBuiltPath}`);
106
+ console.log(` Frontend will be served by backend on port ${effectiveBackendPort}`);
107
+ }
108
+ // Wait a bit for servers to start
109
+ await new Promise(resolve => setTimeout(resolve, 2000));
110
+ console.log(`\n✨ UI Access:`);
111
+ if (useBuiltFrontend) {
112
+ console.log(` Frontend & Backend: http://localhost:${port}`);
113
+ }
114
+ else {
115
+ console.log(` Frontend: http://localhost:${port}`);
116
+ console.log(` Backend: http://localhost:${effectiveBackendPort}`);
117
+ }
118
+ console.log(`\n💡 Press Ctrl+C to stop both servers\n`);
119
+ // Keep the process alive and handle cleanup
120
+ return new Promise((resolve) => {
121
+ const cleanup = () => {
122
+ console.log('\n🛑 Shutting down servers...');
123
+ backendProcess.kill();
124
+ if (frontendProcess) {
125
+ frontendProcess.kill();
126
+ }
127
+ resolve();
128
+ };
129
+ process.on('SIGINT', cleanup);
130
+ process.on('SIGTERM', cleanup);
131
+ // Handle process exits
132
+ backendProcess.on('exit', (code) => {
133
+ if (code !== 0 && code !== null) {
134
+ console.error(`\n❌ Backend process exited with code ${code}`);
135
+ if (frontendProcess) {
136
+ frontendProcess.kill();
137
+ }
138
+ resolve();
139
+ }
140
+ });
141
+ if (frontendProcess) {
142
+ frontendProcess.on('exit', (code) => {
143
+ if (code !== 0 && code !== null) {
144
+ console.error(`\n❌ Frontend process exited with code ${code}`);
145
+ backendProcess.kill();
146
+ resolve();
147
+ }
148
+ });
149
+ }
150
+ });
151
+ }
152
+ //# sourceMappingURL=ui-server.js.map
package/dist/esm/cli.js CHANGED
@@ -6,8 +6,18 @@ import { EXIT_CODES, ConfigurationError } from './core/config.js';
6
6
  // Get package.json for version info
7
7
  const __filename = fileURLToPath(import.meta.url);
8
8
  const __dirname = dirname(__filename);
9
- const packageJsonPath = join(__dirname, '..', 'package.json');
10
- const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
9
+ // When built, CLI is at dist/esm/cli.js, so go up two levels to root
10
+ // When running from source, CLI is at src/cli.ts, so go up one level to root
11
+ const packageJsonPath = join(__dirname, '..', '..', 'package.json');
12
+ let packageJson;
13
+ try {
14
+ packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
15
+ }
16
+ catch {
17
+ // Fallback: try one level up (for source execution)
18
+ const fallbackPath = join(__dirname, '..', 'package.json');
19
+ packageJson = JSON.parse(readFileSync(fallbackPath, 'utf-8'));
20
+ }
11
21
  /**
12
22
  * Display version information
13
23
  */
@@ -28,6 +38,7 @@ Usage:
28
38
  Commands:
29
39
  ingest <path> Ingest documents from file or directory
30
40
  search <query> Search indexed documents (text or image)
41
+ ui Launch the web interface
31
42
  rebuild Rebuild the vector index
32
43
  version Show version information
33
44
  help Show this help message
@@ -43,6 +54,7 @@ Examples:
43
54
  raglite search "red car" --content-type image # Search only image results
44
55
  raglite search ./photo.jpg # Search with image (multimodal mode only)
45
56
  raglite search ./image.png --top-k 5 # Find similar images
57
+ raglite ui # Launch web interface
46
58
 
47
59
  raglite rebuild # Rebuild the entire index
48
60
 
@@ -55,7 +67,7 @@ Options for search:
55
67
  Options for ingest:
56
68
  --model <name> Use specific embedding model
57
69
  --mode <mode> Processing mode: 'text' (default) or 'multimodal'
58
- --rebuild-if-needed Automatically rebuild if model mismatch detected (WARNING: rebuilds entire index)
70
+ --force-rebuild Wipe DB+index and rebuild from scratch (DESTRUCTIVE)
59
71
  --path-strategy <strategy> Path storage strategy: 'relative' (default) or 'absolute'
60
72
  --path-base <path> Base directory for relative paths (defaults to current directory)
61
73
 
@@ -111,8 +123,8 @@ function parseArgs() {
111
123
  else if (optionName === 'no-rerank') {
112
124
  options.rerank = false;
113
125
  }
114
- else if (optionName === 'rebuild-if-needed') {
115
- options.rebuildIfNeeded = true;
126
+ else if (optionName === 'force-rebuild') {
127
+ options.forceRebuild = true;
116
128
  }
117
129
  else if (optionName === 'help') {
118
130
  return { command: 'help', args: [], options: {} };
@@ -169,7 +181,7 @@ function validateArgs(command, args, options) {
169
181
  console.error('Options:');
170
182
  console.error(' --model <name> Use specific embedding model');
171
183
  console.error(' --mode <mode> Processing mode: text (default) or multimodal');
172
- console.error(' --rebuild-if-needed Automatically rebuild if model mismatch detected');
184
+ console.error(' --force-rebuild Wipe DB+index and rebuild from scratch (DESTRUCTIVE)');
173
185
  console.error('');
174
186
  console.error('The path can be either a file (.md or .txt) or a directory.');
175
187
  process.exit(EXIT_CODES.INVALID_ARGUMENTS);
@@ -201,6 +213,9 @@ function validateArgs(command, args, options) {
201
213
  case 'rebuild':
202
214
  // No arguments required
203
215
  break;
216
+ case 'ui':
217
+ // No arguments required
218
+ break;
204
219
  case 'version':
205
220
  // No validation needed
206
221
  break;
@@ -412,6 +427,10 @@ async function main() {
412
427
  const { runRebuild } = await import('./cli/indexer.js');
413
428
  await runRebuild();
414
429
  break;
430
+ case 'ui':
431
+ const { runUI } = await import('./cli/ui-server.js');
432
+ await runUI(options);
433
+ break;
415
434
  default:
416
435
  console.error(`Error: Unknown command '${command}'`);
417
436
  process.exit(1);
@@ -27,10 +27,12 @@ export class BinaryIndexFormat {
27
27
  * @param data Index data to serialize
28
28
  */
29
29
  static async save(indexPath, data) {
30
- // Calculate total size
30
+ // Use actual vector count to ensure accurate file size
31
+ const actualVectorCount = data.vectors.length;
32
+ // Calculate total size based on actual vectors
31
33
  const headerSize = 24; // 6 uint32 fields
32
34
  const vectorSize = 4 + (data.dimensions * 4); // id + vector
33
- const totalSize = headerSize + (data.currentSize * vectorSize);
35
+ const totalSize = headerSize + (actualVectorCount * vectorSize);
34
36
  const buffer = new ArrayBuffer(totalSize);
35
37
  const view = new DataView(buffer);
36
38
  let offset = 0;
@@ -45,7 +47,8 @@ export class BinaryIndexFormat {
45
47
  offset += 4;
46
48
  view.setUint32(offset, data.seed, true);
47
49
  offset += 4;
48
- view.setUint32(offset, data.currentSize, true);
50
+ // Write actual vector count in header
51
+ view.setUint32(offset, actualVectorCount, true);
49
52
  offset += 4;
50
53
  // Write vectors
51
54
  for (const item of data.vectors) {
@@ -210,4 +210,60 @@ export declare function updateStorageStats(connection: DatabaseConnection, stats
210
210
  filesystemRefs?: number;
211
211
  lastCleanup?: Date;
212
212
  }): Promise<void>;
213
+ /**
214
+ * Result of a database reset operation
215
+ */
216
+ export interface DatabaseResetResult {
217
+ /** Whether the reset was successful */
218
+ success: boolean;
219
+ /** Number of documents deleted */
220
+ documentsDeleted: number;
221
+ /** Number of chunks deleted */
222
+ chunksDeleted: number;
223
+ /** Number of content metadata entries deleted */
224
+ contentMetadataDeleted: number;
225
+ /** Whether system_info was preserved or cleared */
226
+ systemInfoCleared: boolean;
227
+ /** Time taken for the reset operation in milliseconds */
228
+ resetTimeMs: number;
229
+ }
230
+ /**
231
+ * Options for database reset operation
232
+ */
233
+ export interface DatabaseResetOptions {
234
+ /** Whether to preserve system_info (mode, model configuration) - default: false */
235
+ preserveSystemInfo?: boolean;
236
+ /** Whether to run VACUUM after deletion to reclaim space - default: true */
237
+ runVacuum?: boolean;
238
+ }
239
+ /**
240
+ * Reset the database by deleting all data while keeping the schema intact.
241
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
242
+ *
243
+ * This function:
244
+ * 1. Deletes all rows from chunks, documents, content_metadata tables
245
+ * 2. Optionally clears system_info (mode/model configuration)
246
+ * 3. Resets storage_stats counters
247
+ * 4. Optionally runs VACUUM to reclaim disk space
248
+ *
249
+ * @param connection - Database connection object
250
+ * @param options - Reset options
251
+ * @returns Promise resolving to reset result statistics
252
+ *
253
+ * @example
254
+ * ```typescript
255
+ * const db = await openDatabase('./db.sqlite');
256
+ * const result = await resetDatabase(db, { preserveSystemInfo: false });
257
+ * console.log(`Deleted ${result.documentsDeleted} documents and ${result.chunksDeleted} chunks`);
258
+ * ```
259
+ */
260
+ export declare function resetDatabase(connection: DatabaseConnection, options?: DatabaseResetOptions): Promise<DatabaseResetResult>;
261
+ /**
262
+ * Check if the database has any data (documents, chunks, or content)
263
+ * Useful for determining if a reset is needed
264
+ *
265
+ * @param connection - Database connection object
266
+ * @returns Promise resolving to true if database has data, false if empty
267
+ */
268
+ export declare function hasDatabaseData(connection: DatabaseConnection): Promise<boolean>;
213
269
  //# sourceMappingURL=db.d.ts.map